hv_storvsc_drv_freebsd.c revision 307617
1/*-
2 * Copyright (c) 2009-2012,2016 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/**
30 * StorVSC driver for Hyper-V.  This driver presents a SCSI HBA interface
31 * to the Comman Access Method (CAM) layer.  CAM control blocks (CCBs) are
32 * converted into VSCSI protocol messages which are delivered to the parent
33 * partition StorVSP driver over the Hyper-V VMBUS.
34 */
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: stable/11/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c 307617 2016-10-19 07:43:39Z sephe $");
37
38#include <sys/param.h>
39#include <sys/proc.h>
40#include <sys/condvar.h>
41#include <sys/time.h>
42#include <sys/systm.h>
43#include <sys/sysctl.h>
44#include <sys/sockio.h>
45#include <sys/mbuf.h>
46#include <sys/malloc.h>
47#include <sys/module.h>
48#include <sys/kernel.h>
49#include <sys/queue.h>
50#include <sys/lock.h>
51#include <sys/sx.h>
52#include <sys/taskqueue.h>
53#include <sys/bus.h>
54#include <sys/mutex.h>
55#include <sys/callout.h>
56#include <sys/smp.h>
57#include <vm/vm.h>
58#include <vm/pmap.h>
59#include <vm/uma.h>
60#include <sys/lock.h>
61#include <sys/sema.h>
62#include <sys/sglist.h>
63#include <sys/eventhandler.h>
64#include <machine/bus.h>
65#include <sys/bus_dma.h>
66
67#include <cam/cam.h>
68#include <cam/cam_ccb.h>
69#include <cam/cam_periph.h>
70#include <cam/cam_sim.h>
71#include <cam/cam_xpt_sim.h>
72#include <cam/cam_xpt_internal.h>
73#include <cam/cam_debug.h>
74#include <cam/scsi/scsi_all.h>
75#include <cam/scsi/scsi_message.h>
76
77#include <dev/hyperv/include/hyperv.h>
78#include <dev/hyperv/include/vmbus.h>
79#include "hv_vstorage.h"
80#include "vmbus_if.h"
81
82#define STORVSC_MAX_LUNS_PER_TARGET	(64)
83#define STORVSC_MAX_IO_REQUESTS		(STORVSC_MAX_LUNS_PER_TARGET * 2)
84#define BLKVSC_MAX_IDE_DISKS_PER_TARGET	(1)
85#define BLKVSC_MAX_IO_REQUESTS		STORVSC_MAX_IO_REQUESTS
86#define STORVSC_MAX_TARGETS		(2)
87
88#define VSTOR_PKT_SIZE	(sizeof(struct vstor_packet) - vmscsi_size_delta)
89
90/*
91 * 33 segments are needed to allow 128KB maxio, in case the data
92 * in the first page is _not_ PAGE_SIZE aligned, e.g.
93 *
94 *     |<----------- 128KB ----------->|
95 *     |                               |
96 *  0  2K 4K    8K   16K   124K  128K  130K
97 *  |  |  |     |     |       |     |  |
98 *  +--+--+-----+-----+.......+-----+--+--+
99 *  |  |  |     |     |       |     |  |  | DATA
100 *  |  |  |     |     |       |     |  |  |
101 *  +--+--+-----+-----+.......------+--+--+
102 *     |  |                         |  |
103 *     | 1|            31           | 1| ...... # of segments
104 */
105#define STORVSC_DATA_SEGCNT_MAX		33
106#define STORVSC_DATA_SEGSZ_MAX		PAGE_SIZE
107#define STORVSC_DATA_SIZE_MAX		\
108	((STORVSC_DATA_SEGCNT_MAX - 1) * STORVSC_DATA_SEGSZ_MAX)
109
110struct storvsc_softc;
111
112struct hv_sgl_node {
113	LIST_ENTRY(hv_sgl_node) link;
114	struct sglist *sgl_data;
115};
116
117struct hv_sgl_page_pool{
118	LIST_HEAD(, hv_sgl_node) in_use_sgl_list;
119	LIST_HEAD(, hv_sgl_node) free_sgl_list;
120	boolean_t                is_init;
121} g_hv_sgl_page_pool;
122
123enum storvsc_request_type {
124	WRITE_TYPE,
125	READ_TYPE,
126	UNKNOWN_TYPE
127};
128
129SYSCTL_NODE(_hw, OID_AUTO, storvsc, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
130	"Hyper-V storage interface");
131
132static u_int hv_storvsc_use_win8ext_flags = 1;
133SYSCTL_UINT(_hw_storvsc, OID_AUTO, use_win8ext_flags, CTLFLAG_RW,
134	&hv_storvsc_use_win8ext_flags, 0,
135	"Use win8 extension flags or not");
136
137static u_int hv_storvsc_use_pim_unmapped = 1;
138SYSCTL_UINT(_hw_storvsc, OID_AUTO, use_pim_unmapped, CTLFLAG_RDTUN,
139	&hv_storvsc_use_pim_unmapped, 0,
140	"Optimize storvsc by using unmapped I/O");
141
142static u_int hv_storvsc_ringbuffer_size = (64 * PAGE_SIZE);
143SYSCTL_UINT(_hw_storvsc, OID_AUTO, ringbuffer_size, CTLFLAG_RDTUN,
144	&hv_storvsc_ringbuffer_size, 0, "Hyper-V storage ringbuffer size");
145
146static u_int hv_storvsc_max_io = 512;
147SYSCTL_UINT(_hw_storvsc, OID_AUTO, max_io, CTLFLAG_RDTUN,
148	&hv_storvsc_max_io, 0, "Hyper-V storage max io limit");
149
150#define STORVSC_MAX_IO						\
151	vmbus_chan_prplist_nelem(hv_storvsc_ringbuffer_size,	\
152	   STORVSC_DATA_SEGCNT_MAX, VSTOR_PKT_SIZE)
153
154struct hv_storvsc_sysctl {
155	u_long		data_bio_cnt;
156	u_long		data_vaddr_cnt;
157	u_long		data_sg_cnt;
158	u_long		chan_send_cnt[MAXCPU];
159};
160
161struct storvsc_gpa_range {
162	struct vmbus_gpa_range	gpa_range;
163	uint64_t		gpa_page[STORVSC_DATA_SEGCNT_MAX];
164} __packed;
165
166struct hv_storvsc_request {
167	LIST_ENTRY(hv_storvsc_request)	link;
168	struct vstor_packet		vstor_packet;
169	int				prp_cnt;
170	struct storvsc_gpa_range	prp_list;
171	void				*sense_data;
172	uint8_t				sense_info_len;
173	uint8_t				retries;
174	union ccb			*ccb;
175	struct storvsc_softc		*softc;
176	struct callout			callout;
177	struct sema			synch_sema; /*Synchronize the request/response if needed */
178	struct sglist			*bounce_sgl;
179	unsigned int			bounce_sgl_count;
180	uint64_t			not_aligned_seg_bits;
181	bus_dmamap_t			data_dmap;
182};
183
184struct storvsc_softc {
185	struct vmbus_channel		*hs_chan;
186	LIST_HEAD(, hv_storvsc_request)	hs_free_list;
187	struct mtx			hs_lock;
188	struct storvsc_driver_props	*hs_drv_props;
189	int 				hs_unit;
190	uint32_t			hs_frozen;
191	struct cam_sim			*hs_sim;
192	struct cam_path 		*hs_path;
193	uint32_t			hs_num_out_reqs;
194	boolean_t			hs_destroy;
195	boolean_t			hs_drain_notify;
196	struct sema 			hs_drain_sema;
197	struct hv_storvsc_request	hs_init_req;
198	struct hv_storvsc_request	hs_reset_req;
199	device_t			hs_dev;
200	bus_dma_tag_t			storvsc_req_dtag;
201	struct hv_storvsc_sysctl	sysctl_data;
202	uint32_t			hs_nchan;
203	struct vmbus_channel		*hs_sel_chan[MAXCPU];
204};
205
206static eventhandler_tag storvsc_handler_tag;
207/*
208 * The size of the vmscsi_request has changed in win8. The
209 * additional size is for the newly added elements in the
210 * structure. These elements are valid only when we are talking
211 * to a win8 host.
212 * Track the correct size we need to apply.
213 */
214static int vmscsi_size_delta = sizeof(struct vmscsi_win8_extension);
215
216/**
217 * HyperV storvsc timeout testing cases:
218 * a. IO returned after first timeout;
219 * b. IO returned after second timeout and queue freeze;
220 * c. IO returned while timer handler is running
221 * The first can be tested by "sg_senddiag -vv /dev/daX",
222 * and the second and third can be done by
223 * "sg_wr_mode -v -p 08 -c 0,1a -m 0,ff /dev/daX".
224 */
225#define HVS_TIMEOUT_TEST 0
226
227/*
228 * Bus/adapter reset functionality on the Hyper-V host is
229 * buggy and it will be disabled until
230 * it can be further tested.
231 */
232#define HVS_HOST_RESET 0
233
234struct storvsc_driver_props {
235	char		*drv_name;
236	char		*drv_desc;
237	uint8_t		drv_max_luns_per_target;
238	uint32_t	drv_max_ios_per_target;
239	uint32_t	drv_ringbuffer_size;
240};
241
242enum hv_storage_type {
243	DRIVER_BLKVSC,
244	DRIVER_STORVSC,
245	DRIVER_UNKNOWN
246};
247
248#define HS_MAX_ADAPTERS 10
249
250#define HV_STORAGE_SUPPORTS_MULTI_CHANNEL 0x1
251
252/* {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} */
253static const struct hyperv_guid gStorVscDeviceType={
254	.hv_guid = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d,
255		 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f}
256};
257
258/* {32412632-86cb-44a2-9b5c-50d1417354f5} */
259static const struct hyperv_guid gBlkVscDeviceType={
260	.hv_guid = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44,
261		 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5}
262};
263
264static struct storvsc_driver_props g_drv_props_table[] = {
265	{"blkvsc", "Hyper-V IDE Storage Interface",
266	 BLKVSC_MAX_IDE_DISKS_PER_TARGET, BLKVSC_MAX_IO_REQUESTS,
267	 20*PAGE_SIZE},
268	{"storvsc", "Hyper-V SCSI Storage Interface",
269	 STORVSC_MAX_LUNS_PER_TARGET, STORVSC_MAX_IO_REQUESTS,
270	 20*PAGE_SIZE}
271};
272
273/*
274 * Sense buffer size changed in win8; have a run-time
275 * variable to track the size we should use.
276 */
277static int sense_buffer_size = PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE;
278
279/*
280 * The storage protocol version is determined during the
281 * initial exchange with the host.  It will indicate which
282 * storage functionality is available in the host.
283*/
284static int vmstor_proto_version;
285
286struct vmstor_proto {
287        int proto_version;
288        int sense_buffer_size;
289        int vmscsi_size_delta;
290};
291
292static const struct vmstor_proto vmstor_proto_list[] = {
293        {
294                VMSTOR_PROTOCOL_VERSION_WIN10,
295                POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
296                0
297        },
298        {
299                VMSTOR_PROTOCOL_VERSION_WIN8_1,
300                POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
301                0
302        },
303        {
304                VMSTOR_PROTOCOL_VERSION_WIN8,
305                POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
306                0
307        },
308        {
309                VMSTOR_PROTOCOL_VERSION_WIN7,
310                PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE,
311                sizeof(struct vmscsi_win8_extension),
312        },
313        {
314                VMSTOR_PROTOCOL_VERSION_WIN6,
315                PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE,
316                sizeof(struct vmscsi_win8_extension),
317        }
318};
319
320/* static functions */
321static int storvsc_probe(device_t dev);
322static int storvsc_attach(device_t dev);
323static int storvsc_detach(device_t dev);
324static void storvsc_poll(struct cam_sim * sim);
325static void storvsc_action(struct cam_sim * sim, union ccb * ccb);
326static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp);
327static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp);
328static enum hv_storage_type storvsc_get_storage_type(device_t dev);
329static void hv_storvsc_rescan_target(struct storvsc_softc *sc);
330static void hv_storvsc_on_channel_callback(struct vmbus_channel *chan, void *xsc);
331static void hv_storvsc_on_iocompletion( struct storvsc_softc *sc,
332					struct vstor_packet *vstor_packet,
333					struct hv_storvsc_request *request);
334static int hv_storvsc_connect_vsp(struct storvsc_softc *);
335static void storvsc_io_done(struct hv_storvsc_request *reqp);
336static void storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
337				bus_dma_segment_t *orig_sgl,
338				unsigned int orig_sgl_count,
339				uint64_t seg_bits);
340void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
341				unsigned int dest_sgl_count,
342				struct sglist* src_sgl,
343				uint64_t seg_bits);
344
345static device_method_t storvsc_methods[] = {
346	/* Device interface */
347	DEVMETHOD(device_probe,		storvsc_probe),
348	DEVMETHOD(device_attach,	storvsc_attach),
349	DEVMETHOD(device_detach,	storvsc_detach),
350	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
351	DEVMETHOD_END
352};
353
354static driver_t storvsc_driver = {
355	"storvsc", storvsc_methods, sizeof(struct storvsc_softc),
356};
357
358static devclass_t storvsc_devclass;
359DRIVER_MODULE(storvsc, vmbus, storvsc_driver, storvsc_devclass, 0, 0);
360MODULE_VERSION(storvsc, 1);
361MODULE_DEPEND(storvsc, vmbus, 1, 1, 1);
362
363static void
364storvsc_subchan_attach(struct storvsc_softc *sc,
365    struct vmbus_channel *new_channel)
366{
367	struct vmstor_chan_props props;
368	int ret = 0;
369
370	memset(&props, 0, sizeof(props));
371
372	vmbus_chan_cpu_rr(new_channel);
373	ret = vmbus_chan_open(new_channel,
374	    sc->hs_drv_props->drv_ringbuffer_size,
375  	    sc->hs_drv_props->drv_ringbuffer_size,
376	    (void *)&props,
377	    sizeof(struct vmstor_chan_props),
378	    hv_storvsc_on_channel_callback, sc);
379}
380
381/**
382 * @brief Send multi-channel creation request to host
383 *
384 * @param device  a Hyper-V device pointer
385 * @param max_chans  the max channels supported by vmbus
386 */
387static void
388storvsc_send_multichannel_request(struct storvsc_softc *sc, int max_chans)
389{
390	struct vmbus_channel **subchan;
391	struct hv_storvsc_request *request;
392	struct vstor_packet *vstor_packet;
393	int request_channels_cnt = 0;
394	int ret, i;
395
396	/* get multichannels count that need to create */
397	request_channels_cnt = MIN(max_chans, mp_ncpus);
398
399	request = &sc->hs_init_req;
400
401	/* request the host to create multi-channel */
402	memset(request, 0, sizeof(struct hv_storvsc_request));
403
404	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
405
406	vstor_packet = &request->vstor_packet;
407
408	vstor_packet->operation = VSTOR_OPERATION_CREATE_MULTI_CHANNELS;
409	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
410	vstor_packet->u.multi_channels_cnt = request_channels_cnt;
411
412	ret = vmbus_chan_send(sc->hs_chan,
413	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
414	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
415
416	/* wait for 5 seconds */
417	ret = sema_timedwait(&request->synch_sema, 5 * hz);
418	if (ret != 0) {
419		printf("Storvsc_error: create multi-channel timeout, %d\n",
420		    ret);
421		return;
422	}
423
424	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
425	    vstor_packet->status != 0) {
426		printf("Storvsc_error: create multi-channel invalid operation "
427		    "(%d) or statue (%u)\n",
428		    vstor_packet->operation, vstor_packet->status);
429		return;
430	}
431
432	/* Update channel count */
433	sc->hs_nchan = request_channels_cnt + 1;
434
435	/* Wait for sub-channels setup to complete. */
436	subchan = vmbus_subchan_get(sc->hs_chan, request_channels_cnt);
437
438	/* Attach the sub-channels. */
439	for (i = 0; i < request_channels_cnt; ++i)
440		storvsc_subchan_attach(sc, subchan[i]);
441
442	/* Release the sub-channels. */
443	vmbus_subchan_rel(subchan, request_channels_cnt);
444
445	if (bootverbose)
446		printf("Storvsc create multi-channel success!\n");
447}
448
449/**
450 * @brief initialize channel connection to parent partition
451 *
452 * @param dev  a Hyper-V device pointer
453 * @returns  0 on success, non-zero error on failure
454 */
455static int
456hv_storvsc_channel_init(struct storvsc_softc *sc)
457{
458	int ret = 0, i;
459	struct hv_storvsc_request *request;
460	struct vstor_packet *vstor_packet;
461	uint16_t max_chans = 0;
462	boolean_t support_multichannel = FALSE;
463	uint32_t version;
464
465	max_chans = 0;
466	support_multichannel = FALSE;
467
468	request = &sc->hs_init_req;
469	memset(request, 0, sizeof(struct hv_storvsc_request));
470	vstor_packet = &request->vstor_packet;
471	request->softc = sc;
472
473	/**
474	 * Initiate the vsc/vsp initialization protocol on the open channel
475	 */
476	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
477
478	vstor_packet->operation = VSTOR_OPERATION_BEGININITIALIZATION;
479	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
480
481
482	ret = vmbus_chan_send(sc->hs_chan,
483	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
484	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
485
486	if (ret != 0)
487		goto cleanup;
488
489	/* wait 5 seconds */
490	ret = sema_timedwait(&request->synch_sema, 5 * hz);
491	if (ret != 0)
492		goto cleanup;
493
494	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
495		vstor_packet->status != 0) {
496		goto cleanup;
497	}
498
499	for (i = 0; i < nitems(vmstor_proto_list); i++) {
500		/* reuse the packet for version range supported */
501
502		memset(vstor_packet, 0, sizeof(struct vstor_packet));
503		vstor_packet->operation = VSTOR_OPERATION_QUERYPROTOCOLVERSION;
504		vstor_packet->flags = REQUEST_COMPLETION_FLAG;
505
506		vstor_packet->u.version.major_minor =
507			vmstor_proto_list[i].proto_version;
508
509		/* revision is only significant for Windows guests */
510		vstor_packet->u.version.revision = 0;
511
512		ret = vmbus_chan_send(sc->hs_chan,
513		    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
514		    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
515
516		if (ret != 0)
517			goto cleanup;
518
519		/* wait 5 seconds */
520		ret = sema_timedwait(&request->synch_sema, 5 * hz);
521
522		if (ret)
523			goto cleanup;
524
525		if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO) {
526			ret = EINVAL;
527			goto cleanup;
528		}
529		if (vstor_packet->status == 0) {
530			vmstor_proto_version =
531				vmstor_proto_list[i].proto_version;
532			sense_buffer_size =
533				vmstor_proto_list[i].sense_buffer_size;
534			vmscsi_size_delta =
535				vmstor_proto_list[i].vmscsi_size_delta;
536			break;
537		}
538	}
539
540	if (vstor_packet->status != 0) {
541		ret = EINVAL;
542		goto cleanup;
543	}
544	/**
545	 * Query channel properties
546	 */
547	memset(vstor_packet, 0, sizeof(struct vstor_packet));
548	vstor_packet->operation = VSTOR_OPERATION_QUERYPROPERTIES;
549	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
550
551	ret = vmbus_chan_send(sc->hs_chan,
552	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
553	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
554
555	if ( ret != 0)
556		goto cleanup;
557
558	/* wait 5 seconds */
559	ret = sema_timedwait(&request->synch_sema, 5 * hz);
560
561	if (ret != 0)
562		goto cleanup;
563
564	/* TODO: Check returned version */
565	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
566	    vstor_packet->status != 0) {
567		goto cleanup;
568	}
569
570	/* multi-channels feature is supported by WIN8 and above version */
571	max_chans = vstor_packet->u.chan_props.max_channel_cnt;
572	version = VMBUS_GET_VERSION(device_get_parent(sc->hs_dev), sc->hs_dev);
573	if (version != VMBUS_VERSION_WIN7 && version != VMBUS_VERSION_WS2008 &&
574	    (vstor_packet->u.chan_props.flags &
575	     HV_STORAGE_SUPPORTS_MULTI_CHANNEL)) {
576		support_multichannel = TRUE;
577	}
578
579	memset(vstor_packet, 0, sizeof(struct vstor_packet));
580	vstor_packet->operation = VSTOR_OPERATION_ENDINITIALIZATION;
581	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
582
583	ret = vmbus_chan_send(sc->hs_chan,
584	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
585	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
586
587	if (ret != 0) {
588		goto cleanup;
589	}
590
591	/* wait 5 seconds */
592	ret = sema_timedwait(&request->synch_sema, 5 * hz);
593
594	if (ret != 0)
595		goto cleanup;
596
597	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
598	    vstor_packet->status != 0)
599		goto cleanup;
600
601	/*
602	 * If multi-channel is supported, send multichannel create
603	 * request to host.
604	 */
605	if (support_multichannel)
606		storvsc_send_multichannel_request(sc, max_chans);
607cleanup:
608	sema_destroy(&request->synch_sema);
609	return (ret);
610}
611
612/**
613 * @brief Open channel connection to paraent partition StorVSP driver
614 *
615 * Open and initialize channel connection to parent partition StorVSP driver.
616 *
617 * @param pointer to a Hyper-V device
618 * @returns 0 on success, non-zero error on failure
619 */
620static int
621hv_storvsc_connect_vsp(struct storvsc_softc *sc)
622{
623	int ret = 0;
624	struct vmstor_chan_props props;
625
626	memset(&props, 0, sizeof(struct vmstor_chan_props));
627
628	/*
629	 * Open the channel
630	 */
631	vmbus_chan_cpu_rr(sc->hs_chan);
632	ret = vmbus_chan_open(
633		sc->hs_chan,
634		sc->hs_drv_props->drv_ringbuffer_size,
635		sc->hs_drv_props->drv_ringbuffer_size,
636		(void *)&props,
637		sizeof(struct vmstor_chan_props),
638		hv_storvsc_on_channel_callback, sc);
639
640	if (ret != 0) {
641		return ret;
642	}
643
644	ret = hv_storvsc_channel_init(sc);
645	return (ret);
646}
647
648#if HVS_HOST_RESET
649static int
650hv_storvsc_host_reset(struct storvsc_softc *sc)
651{
652	int ret = 0;
653
654	struct hv_storvsc_request *request;
655	struct vstor_packet *vstor_packet;
656
657	request = &sc->hs_reset_req;
658	request->softc = sc;
659	vstor_packet = &request->vstor_packet;
660
661	sema_init(&request->synch_sema, 0, "stor synch sema");
662
663	vstor_packet->operation = VSTOR_OPERATION_RESETBUS;
664	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
665
666	ret = vmbus_chan_send(dev->channel,
667	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
668	    vstor_packet, VSTOR_PKT_SIZE,
669	    (uint64_t)(uintptr_t)&sc->hs_reset_req);
670
671	if (ret != 0) {
672		goto cleanup;
673	}
674
675	ret = sema_timedwait(&request->synch_sema, 5 * hz); /* KYS 5 seconds */
676
677	if (ret) {
678		goto cleanup;
679	}
680
681
682	/*
683	 * At this point, all outstanding requests in the adapter
684	 * should have been flushed out and return to us
685	 */
686
687cleanup:
688	sema_destroy(&request->synch_sema);
689	return (ret);
690}
691#endif /* HVS_HOST_RESET */
692
693/**
694 * @brief Function to initiate an I/O request
695 *
696 * @param device Hyper-V device pointer
697 * @param request pointer to a request structure
698 * @returns 0 on success, non-zero error on failure
699 */
700static int
701hv_storvsc_io_request(struct storvsc_softc *sc,
702					  struct hv_storvsc_request *request)
703{
704	struct vstor_packet *vstor_packet = &request->vstor_packet;
705	struct vmbus_channel* outgoing_channel = NULL;
706	int ret = 0, ch_sel;
707
708	vstor_packet->flags |= REQUEST_COMPLETION_FLAG;
709
710	vstor_packet->u.vm_srb.length =
711	    sizeof(struct vmscsi_req) - vmscsi_size_delta;
712
713	vstor_packet->u.vm_srb.sense_info_len = sense_buffer_size;
714
715	vstor_packet->u.vm_srb.transfer_len =
716	    request->prp_list.gpa_range.gpa_len;
717
718	vstor_packet->operation = VSTOR_OPERATION_EXECUTESRB;
719
720	ch_sel = (vstor_packet->u.vm_srb.lun + curcpu) % sc->hs_nchan;
721	outgoing_channel = sc->hs_sel_chan[ch_sel];
722
723	mtx_unlock(&request->softc->hs_lock);
724	if (request->prp_list.gpa_range.gpa_len) {
725		ret = vmbus_chan_send_prplist(outgoing_channel,
726		    &request->prp_list.gpa_range, request->prp_cnt,
727		    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
728	} else {
729		ret = vmbus_chan_send(outgoing_channel,
730		    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
731		    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
732	}
733	/* statistic for successful request sending on each channel */
734	if (!ret) {
735		sc->sysctl_data.chan_send_cnt[ch_sel]++;
736	}
737	mtx_lock(&request->softc->hs_lock);
738
739	if (ret != 0) {
740		printf("Unable to send packet %p ret %d", vstor_packet, ret);
741	} else {
742		atomic_add_int(&sc->hs_num_out_reqs, 1);
743	}
744
745	return (ret);
746}
747
748
749/**
750 * Process IO_COMPLETION_OPERATION and ready
751 * the result to be completed for upper layer
752 * processing by the CAM layer.
753 */
754static void
755hv_storvsc_on_iocompletion(struct storvsc_softc *sc,
756			   struct vstor_packet *vstor_packet,
757			   struct hv_storvsc_request *request)
758{
759	struct vmscsi_req *vm_srb;
760
761	vm_srb = &vstor_packet->u.vm_srb;
762
763	/*
764	 * Copy some fields of the host's response into the request structure,
765	 * because the fields will be used later in storvsc_io_done().
766	 */
767	request->vstor_packet.u.vm_srb.scsi_status = vm_srb->scsi_status;
768	request->vstor_packet.u.vm_srb.srb_status = vm_srb->srb_status;
769	request->vstor_packet.u.vm_srb.transfer_len = vm_srb->transfer_len;
770
771	if (((vm_srb->scsi_status & 0xFF) == SCSI_STATUS_CHECK_COND) &&
772			(vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)) {
773		/* Autosense data available */
774
775		KASSERT(vm_srb->sense_info_len <= request->sense_info_len,
776				("vm_srb->sense_info_len <= "
777				 "request->sense_info_len"));
778
779		memcpy(request->sense_data, vm_srb->u.sense_data,
780			vm_srb->sense_info_len);
781
782		request->sense_info_len = vm_srb->sense_info_len;
783	}
784
785	/* Complete request by passing to the CAM layer */
786	storvsc_io_done(request);
787	atomic_subtract_int(&sc->hs_num_out_reqs, 1);
788	if (sc->hs_drain_notify && (sc->hs_num_out_reqs == 0)) {
789		sema_post(&sc->hs_drain_sema);
790	}
791}
792
793static void
794hv_storvsc_rescan_target(struct storvsc_softc *sc)
795{
796	path_id_t pathid;
797	target_id_t targetid;
798	union ccb *ccb;
799
800	pathid = cam_sim_path(sc->hs_sim);
801	targetid = CAM_TARGET_WILDCARD;
802
803	/*
804	 * Allocate a CCB and schedule a rescan.
805	 */
806	ccb = xpt_alloc_ccb_nowait();
807	if (ccb == NULL) {
808		printf("unable to alloc CCB for rescan\n");
809		return;
810	}
811
812	if (xpt_create_path(&ccb->ccb_h.path, NULL, pathid, targetid,
813	    CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
814		printf("unable to create path for rescan, pathid: %u,"
815		    "targetid: %u\n", pathid, targetid);
816		xpt_free_ccb(ccb);
817		return;
818	}
819
820	if (targetid == CAM_TARGET_WILDCARD)
821		ccb->ccb_h.func_code = XPT_SCAN_BUS;
822	else
823		ccb->ccb_h.func_code = XPT_SCAN_TGT;
824
825	xpt_rescan(ccb);
826}
827
828static void
829hv_storvsc_on_channel_callback(struct vmbus_channel *channel, void *xsc)
830{
831	int ret = 0;
832	struct storvsc_softc *sc = xsc;
833	uint32_t bytes_recvd;
834	uint64_t request_id;
835	uint8_t packet[roundup2(sizeof(struct vstor_packet), 8)];
836	struct hv_storvsc_request *request;
837	struct vstor_packet *vstor_packet;
838
839	bytes_recvd = roundup2(VSTOR_PKT_SIZE, 8);
840	ret = vmbus_chan_recv(channel, packet, &bytes_recvd, &request_id);
841	KASSERT(ret != ENOBUFS, ("storvsc recvbuf is not large enough"));
842	/* XXX check bytes_recvd to make sure that it contains enough data */
843
844	while ((ret == 0) && (bytes_recvd > 0)) {
845		request = (struct hv_storvsc_request *)(uintptr_t)request_id;
846
847		if ((request == &sc->hs_init_req) ||
848			(request == &sc->hs_reset_req)) {
849			memcpy(&request->vstor_packet, packet,
850				   sizeof(struct vstor_packet));
851			sema_post(&request->synch_sema);
852		} else {
853			vstor_packet = (struct vstor_packet *)packet;
854			switch(vstor_packet->operation) {
855			case VSTOR_OPERATION_COMPLETEIO:
856				if (request == NULL)
857					panic("VMBUS: storvsc received a "
858					    "packet with NULL request id in "
859					    "COMPLETEIO operation.");
860
861				hv_storvsc_on_iocompletion(sc,
862							vstor_packet, request);
863				break;
864			case VSTOR_OPERATION_REMOVEDEVICE:
865				printf("VMBUS: storvsc operation %d not "
866				    "implemented.\n", vstor_packet->operation);
867				/* TODO: implement */
868				break;
869			case VSTOR_OPERATION_ENUMERATE_BUS:
870				hv_storvsc_rescan_target(sc);
871				break;
872			default:
873				break;
874			}
875		}
876
877		bytes_recvd = roundup2(VSTOR_PKT_SIZE, 8),
878		ret = vmbus_chan_recv(channel, packet, &bytes_recvd,
879		    &request_id);
880		KASSERT(ret != ENOBUFS,
881		    ("storvsc recvbuf is not large enough"));
882		/*
883		 * XXX check bytes_recvd to make sure that it contains
884		 * enough data
885		 */
886	}
887}
888
889/**
890 * @brief StorVSC probe function
891 *
892 * Device probe function.  Returns 0 if the input device is a StorVSC
893 * device.  Otherwise, a ENXIO is returned.  If the input device is
894 * for BlkVSC (paravirtual IDE) device and this support is disabled in
895 * favor of the emulated ATA/IDE device, return ENXIO.
896 *
897 * @param a device
898 * @returns 0 on success, ENXIO if not a matcing StorVSC device
899 */
900static int
901storvsc_probe(device_t dev)
902{
903	int ret	= ENXIO;
904
905	switch (storvsc_get_storage_type(dev)) {
906	case DRIVER_BLKVSC:
907		if(bootverbose)
908			device_printf(dev,
909			    "Enlightened ATA/IDE detected\n");
910		device_set_desc(dev, g_drv_props_table[DRIVER_BLKVSC].drv_desc);
911		ret = BUS_PROBE_DEFAULT;
912		break;
913	case DRIVER_STORVSC:
914		if(bootverbose)
915			device_printf(dev, "Enlightened SCSI device detected\n");
916		device_set_desc(dev, g_drv_props_table[DRIVER_STORVSC].drv_desc);
917		ret = BUS_PROBE_DEFAULT;
918		break;
919	default:
920		ret = ENXIO;
921	}
922	return (ret);
923}
924
925static void
926storvsc_create_chan_sel(struct storvsc_softc *sc)
927{
928	struct vmbus_channel **subch;
929	int i, nsubch;
930
931	sc->hs_sel_chan[0] = sc->hs_chan;
932	nsubch = sc->hs_nchan - 1;
933	if (nsubch == 0)
934		return;
935
936	subch = vmbus_subchan_get(sc->hs_chan, nsubch);
937	for (i = 0; i < nsubch; i++)
938		sc->hs_sel_chan[i + 1] = subch[i];
939	vmbus_subchan_rel(subch, nsubch);
940}
941
942static int
943storvsc_init_requests(device_t dev)
944{
945	struct storvsc_softc *sc = device_get_softc(dev);
946	struct hv_storvsc_request *reqp;
947	int error, i;
948
949	LIST_INIT(&sc->hs_free_list);
950
951	error = bus_dma_tag_create(
952		bus_get_dma_tag(dev),		/* parent */
953		1,				/* alignment */
954		PAGE_SIZE,			/* boundary */
955		BUS_SPACE_MAXADDR,		/* lowaddr */
956		BUS_SPACE_MAXADDR,		/* highaddr */
957		NULL, NULL,			/* filter, filterarg */
958		STORVSC_DATA_SIZE_MAX,		/* maxsize */
959		STORVSC_DATA_SEGCNT_MAX,	/* nsegments */
960		STORVSC_DATA_SEGSZ_MAX,		/* maxsegsize */
961		0,				/* flags */
962		NULL,				/* lockfunc */
963		NULL,				/* lockfuncarg */
964		&sc->storvsc_req_dtag);
965	if (error) {
966		device_printf(dev, "failed to create storvsc dma tag\n");
967		return (error);
968	}
969
970	for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; ++i) {
971		reqp = malloc(sizeof(struct hv_storvsc_request),
972				 M_DEVBUF, M_WAITOK|M_ZERO);
973		reqp->softc = sc;
974		error = bus_dmamap_create(sc->storvsc_req_dtag, 0,
975				&reqp->data_dmap);
976		if (error) {
977			device_printf(dev, "failed to allocate storvsc "
978			    "data dmamap\n");
979			goto cleanup;
980		}
981		LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
982	}
983	return (0);
984
985cleanup:
986	while ((reqp = LIST_FIRST(&sc->hs_free_list)) != NULL) {
987		LIST_REMOVE(reqp, link);
988		bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
989		free(reqp, M_DEVBUF);
990	}
991	return (error);
992}
993
994static void
995storvsc_sysctl(device_t dev)
996{
997	struct sysctl_oid_list *child;
998	struct sysctl_ctx_list *ctx;
999	struct sysctl_oid *ch_tree, *chid_tree;
1000	struct storvsc_softc *sc;
1001	char name[16];
1002	int i;
1003
1004	sc = device_get_softc(dev);
1005	ctx = device_get_sysctl_ctx(dev);
1006	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
1007
1008	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_bio_cnt", CTLFLAG_RW,
1009		&sc->sysctl_data.data_bio_cnt, "# of bio data block");
1010	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_vaddr_cnt", CTLFLAG_RW,
1011		&sc->sysctl_data.data_vaddr_cnt, "# of vaddr data block");
1012	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_sg_cnt", CTLFLAG_RW,
1013		&sc->sysctl_data.data_sg_cnt, "# of sg data block");
1014
1015	/* dev.storvsc.UNIT.channel */
1016	ch_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "channel",
1017		CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
1018	if (ch_tree == NULL)
1019		return;
1020
1021	for (i = 0; i < sc->hs_nchan; i++) {
1022		uint32_t ch_id;
1023
1024		ch_id = vmbus_chan_id(sc->hs_sel_chan[i]);
1025		snprintf(name, sizeof(name), "%d", ch_id);
1026		/* dev.storvsc.UNIT.channel.CHID */
1027		chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree),
1028			OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
1029		if (chid_tree == NULL)
1030			return;
1031		/* dev.storvsc.UNIT.channel.CHID.send_req */
1032		SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
1033			"send_req", CTLFLAG_RD, &sc->sysctl_data.chan_send_cnt[i],
1034			"# of request sending from this channel");
1035	}
1036}
1037
1038/**
1039 * @brief StorVSC attach function
1040 *
1041 * Function responsible for allocating per-device structures,
1042 * setting up CAM interfaces and scanning for available LUNs to
1043 * be used for SCSI device peripherals.
1044 *
1045 * @param a device
1046 * @returns 0 on success or an error on failure
1047 */
1048static int
1049storvsc_attach(device_t dev)
1050{
1051	enum hv_storage_type stor_type;
1052	struct storvsc_softc *sc;
1053	struct cam_devq *devq;
1054	int ret, i, j;
1055	struct hv_storvsc_request *reqp;
1056	struct root_hold_token *root_mount_token = NULL;
1057	struct hv_sgl_node *sgl_node = NULL;
1058	void *tmp_buff = NULL;
1059
1060	/*
1061	 * We need to serialize storvsc attach calls.
1062	 */
1063	root_mount_token = root_mount_hold("storvsc");
1064
1065	sc = device_get_softc(dev);
1066	sc->hs_nchan = 1;
1067	sc->hs_chan = vmbus_get_channel(dev);
1068
1069	stor_type = storvsc_get_storage_type(dev);
1070
1071	if (stor_type == DRIVER_UNKNOWN) {
1072		ret = ENODEV;
1073		goto cleanup;
1074	}
1075
1076	/* fill in driver specific properties */
1077	sc->hs_drv_props = &g_drv_props_table[stor_type];
1078	sc->hs_drv_props->drv_ringbuffer_size = hv_storvsc_ringbuffer_size;
1079	sc->hs_drv_props->drv_max_ios_per_target =
1080		MIN(STORVSC_MAX_IO, hv_storvsc_max_io);
1081	if (bootverbose) {
1082		printf("storvsc ringbuffer size: %d, max_io: %d\n",
1083			sc->hs_drv_props->drv_ringbuffer_size,
1084			sc->hs_drv_props->drv_max_ios_per_target);
1085	}
1086	/* fill in device specific properties */
1087	sc->hs_unit	= device_get_unit(dev);
1088	sc->hs_dev	= dev;
1089
1090	mtx_init(&sc->hs_lock, "hvslck", NULL, MTX_DEF);
1091
1092	ret = storvsc_init_requests(dev);
1093	if (ret != 0)
1094		goto cleanup;
1095
1096	/* create sg-list page pool */
1097	if (FALSE == g_hv_sgl_page_pool.is_init) {
1098		g_hv_sgl_page_pool.is_init = TRUE;
1099		LIST_INIT(&g_hv_sgl_page_pool.in_use_sgl_list);
1100		LIST_INIT(&g_hv_sgl_page_pool.free_sgl_list);
1101
1102		/*
1103		 * Pre-create SG list, each SG list with
1104		 * STORVSC_DATA_SEGCNT_MAX segments, each
1105		 * segment has one page buffer
1106		 */
1107		for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; i++) {
1108	        	sgl_node = malloc(sizeof(struct hv_sgl_node),
1109			    M_DEVBUF, M_WAITOK|M_ZERO);
1110
1111			sgl_node->sgl_data =
1112			    sglist_alloc(STORVSC_DATA_SEGCNT_MAX,
1113			    M_WAITOK|M_ZERO);
1114
1115			for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++) {
1116				tmp_buff = malloc(PAGE_SIZE,
1117				    M_DEVBUF, M_WAITOK|M_ZERO);
1118
1119				sgl_node->sgl_data->sg_segs[j].ss_paddr =
1120				    (vm_paddr_t)tmp_buff;
1121			}
1122
1123			LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list,
1124			    sgl_node, link);
1125		}
1126	}
1127
1128	sc->hs_destroy = FALSE;
1129	sc->hs_drain_notify = FALSE;
1130	sema_init(&sc->hs_drain_sema, 0, "Store Drain Sema");
1131
1132	ret = hv_storvsc_connect_vsp(sc);
1133	if (ret != 0) {
1134		goto cleanup;
1135	}
1136
1137	/* Construct cpu to channel mapping */
1138	storvsc_create_chan_sel(sc);
1139
1140	/*
1141	 * Create the device queue.
1142	 * Hyper-V maps each target to one SCSI HBA
1143	 */
1144	devq = cam_simq_alloc(sc->hs_drv_props->drv_max_ios_per_target);
1145	if (devq == NULL) {
1146		device_printf(dev, "Failed to alloc device queue\n");
1147		ret = ENOMEM;
1148		goto cleanup;
1149	}
1150
1151	sc->hs_sim = cam_sim_alloc(storvsc_action,
1152				storvsc_poll,
1153				sc->hs_drv_props->drv_name,
1154				sc,
1155				sc->hs_unit,
1156				&sc->hs_lock, 1,
1157				sc->hs_drv_props->drv_max_ios_per_target,
1158				devq);
1159
1160	if (sc->hs_sim == NULL) {
1161		device_printf(dev, "Failed to alloc sim\n");
1162		cam_simq_free(devq);
1163		ret = ENOMEM;
1164		goto cleanup;
1165	}
1166
1167	mtx_lock(&sc->hs_lock);
1168	/* bus_id is set to 0, need to get it from VMBUS channel query? */
1169	if (xpt_bus_register(sc->hs_sim, dev, 0) != CAM_SUCCESS) {
1170		cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
1171		mtx_unlock(&sc->hs_lock);
1172		device_printf(dev, "Unable to register SCSI bus\n");
1173		ret = ENXIO;
1174		goto cleanup;
1175	}
1176
1177	if (xpt_create_path(&sc->hs_path, /*periph*/NULL,
1178		 cam_sim_path(sc->hs_sim),
1179		CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
1180		xpt_bus_deregister(cam_sim_path(sc->hs_sim));
1181		cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
1182		mtx_unlock(&sc->hs_lock);
1183		device_printf(dev, "Unable to create path\n");
1184		ret = ENXIO;
1185		goto cleanup;
1186	}
1187
1188	mtx_unlock(&sc->hs_lock);
1189
1190	storvsc_sysctl(dev);
1191
1192	root_mount_rel(root_mount_token);
1193	return (0);
1194
1195
1196cleanup:
1197	root_mount_rel(root_mount_token);
1198	while (!LIST_EMPTY(&sc->hs_free_list)) {
1199		reqp = LIST_FIRST(&sc->hs_free_list);
1200		LIST_REMOVE(reqp, link);
1201		bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
1202		free(reqp, M_DEVBUF);
1203	}
1204
1205	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1206		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1207		LIST_REMOVE(sgl_node, link);
1208		for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++) {
1209			if (NULL !=
1210			    (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
1211				free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
1212			}
1213		}
1214		sglist_free(sgl_node->sgl_data);
1215		free(sgl_node, M_DEVBUF);
1216	}
1217
1218	return (ret);
1219}
1220
1221/**
1222 * @brief StorVSC device detach function
1223 *
1224 * This function is responsible for safely detaching a
1225 * StorVSC device.  This includes waiting for inbound responses
1226 * to complete and freeing associated per-device structures.
1227 *
1228 * @param dev a device
1229 * returns 0 on success
1230 */
1231static int
1232storvsc_detach(device_t dev)
1233{
1234	struct storvsc_softc *sc = device_get_softc(dev);
1235	struct hv_storvsc_request *reqp = NULL;
1236	struct hv_sgl_node *sgl_node = NULL;
1237	int j = 0;
1238
1239	sc->hs_destroy = TRUE;
1240
1241	/*
1242	 * At this point, all outbound traffic should be disabled. We
1243	 * only allow inbound traffic (responses) to proceed so that
1244	 * outstanding requests can be completed.
1245	 */
1246
1247	sc->hs_drain_notify = TRUE;
1248	sema_wait(&sc->hs_drain_sema);
1249	sc->hs_drain_notify = FALSE;
1250
1251	/*
1252	 * Since we have already drained, we don't need to busy wait.
1253	 * The call to close the channel will reset the callback
1254	 * under the protection of the incoming channel lock.
1255	 */
1256
1257	vmbus_chan_close(sc->hs_chan);
1258
1259	mtx_lock(&sc->hs_lock);
1260	while (!LIST_EMPTY(&sc->hs_free_list)) {
1261		reqp = LIST_FIRST(&sc->hs_free_list);
1262		LIST_REMOVE(reqp, link);
1263		bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
1264		free(reqp, M_DEVBUF);
1265	}
1266	mtx_unlock(&sc->hs_lock);
1267
1268	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1269		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1270		LIST_REMOVE(sgl_node, link);
1271		for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++){
1272			if (NULL !=
1273			    (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
1274				free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
1275			}
1276		}
1277		sglist_free(sgl_node->sgl_data);
1278		free(sgl_node, M_DEVBUF);
1279	}
1280
1281	return (0);
1282}
1283
1284#if HVS_TIMEOUT_TEST
1285/**
1286 * @brief unit test for timed out operations
1287 *
1288 * This function provides unit testing capability to simulate
1289 * timed out operations.  Recompilation with HV_TIMEOUT_TEST=1
1290 * is required.
1291 *
1292 * @param reqp pointer to a request structure
1293 * @param opcode SCSI operation being performed
1294 * @param wait if 1, wait for I/O to complete
1295 */
1296static void
1297storvsc_timeout_test(struct hv_storvsc_request *reqp,
1298		uint8_t opcode, int wait)
1299{
1300	int ret;
1301	union ccb *ccb = reqp->ccb;
1302	struct storvsc_softc *sc = reqp->softc;
1303
1304	if (reqp->vstor_packet.vm_srb.cdb[0] != opcode) {
1305		return;
1306	}
1307
1308	if (wait) {
1309		mtx_lock(&reqp->event.mtx);
1310	}
1311	ret = hv_storvsc_io_request(sc, reqp);
1312	if (ret != 0) {
1313		if (wait) {
1314			mtx_unlock(&reqp->event.mtx);
1315		}
1316		printf("%s: io_request failed with %d.\n",
1317				__func__, ret);
1318		ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1319		mtx_lock(&sc->hs_lock);
1320		storvsc_free_request(sc, reqp);
1321		xpt_done(ccb);
1322		mtx_unlock(&sc->hs_lock);
1323		return;
1324	}
1325
1326	if (wait) {
1327		xpt_print(ccb->ccb_h.path,
1328				"%u: %s: waiting for IO return.\n",
1329				ticks, __func__);
1330		ret = cv_timedwait(&reqp->event.cv, &reqp->event.mtx, 60*hz);
1331		mtx_unlock(&reqp->event.mtx);
1332		xpt_print(ccb->ccb_h.path, "%u: %s: %s.\n",
1333				ticks, __func__, (ret == 0)?
1334				"IO return detected" :
1335				"IO return not detected");
1336		/*
1337		 * Now both the timer handler and io done are running
1338		 * simultaneously. We want to confirm the io done always
1339		 * finishes after the timer handler exits. So reqp used by
1340		 * timer handler is not freed or stale. Do busy loop for
1341		 * another 1/10 second to make sure io done does
1342		 * wait for the timer handler to complete.
1343		 */
1344		DELAY(100*1000);
1345		mtx_lock(&sc->hs_lock);
1346		xpt_print(ccb->ccb_h.path,
1347				"%u: %s: finishing, queue frozen %d, "
1348				"ccb status 0x%x scsi_status 0x%x.\n",
1349				ticks, __func__, sc->hs_frozen,
1350				ccb->ccb_h.status,
1351				ccb->csio.scsi_status);
1352		mtx_unlock(&sc->hs_lock);
1353	}
1354}
1355#endif /* HVS_TIMEOUT_TEST */
1356
1357#ifdef notyet
1358/**
1359 * @brief timeout handler for requests
1360 *
1361 * This function is called as a result of a callout expiring.
1362 *
1363 * @param arg pointer to a request
1364 */
1365static void
1366storvsc_timeout(void *arg)
1367{
1368	struct hv_storvsc_request *reqp = arg;
1369	struct storvsc_softc *sc = reqp->softc;
1370	union ccb *ccb = reqp->ccb;
1371
1372	if (reqp->retries == 0) {
1373		mtx_lock(&sc->hs_lock);
1374		xpt_print(ccb->ccb_h.path,
1375		    "%u: IO timed out (req=0x%p), wait for another %u secs.\n",
1376		    ticks, reqp, ccb->ccb_h.timeout / 1000);
1377		cam_error_print(ccb, CAM_ESF_ALL, CAM_EPF_ALL);
1378		mtx_unlock(&sc->hs_lock);
1379
1380		reqp->retries++;
1381		callout_reset_sbt(&reqp->callout, SBT_1MS * ccb->ccb_h.timeout,
1382		    0, storvsc_timeout, reqp, 0);
1383#if HVS_TIMEOUT_TEST
1384		storvsc_timeout_test(reqp, SEND_DIAGNOSTIC, 0);
1385#endif
1386		return;
1387	}
1388
1389	mtx_lock(&sc->hs_lock);
1390	xpt_print(ccb->ccb_h.path,
1391		"%u: IO (reqp = 0x%p) did not return for %u seconds, %s.\n",
1392		ticks, reqp, ccb->ccb_h.timeout * (reqp->retries+1) / 1000,
1393		(sc->hs_frozen == 0)?
1394		"freezing the queue" : "the queue is already frozen");
1395	if (sc->hs_frozen == 0) {
1396		sc->hs_frozen = 1;
1397		xpt_freeze_simq(xpt_path_sim(ccb->ccb_h.path), 1);
1398	}
1399	mtx_unlock(&sc->hs_lock);
1400
1401#if HVS_TIMEOUT_TEST
1402	storvsc_timeout_test(reqp, MODE_SELECT_10, 1);
1403#endif
1404}
1405#endif
1406
1407/**
1408 * @brief StorVSC device poll function
1409 *
1410 * This function is responsible for servicing requests when
1411 * interrupts are disabled (i.e when we are dumping core.)
1412 *
1413 * @param sim a pointer to a CAM SCSI interface module
1414 */
1415static void
1416storvsc_poll(struct cam_sim *sim)
1417{
1418	struct storvsc_softc *sc = cam_sim_softc(sim);
1419
1420	mtx_assert(&sc->hs_lock, MA_OWNED);
1421	mtx_unlock(&sc->hs_lock);
1422	hv_storvsc_on_channel_callback(sc->hs_chan, sc);
1423	mtx_lock(&sc->hs_lock);
1424}
1425
1426/**
1427 * @brief StorVSC device action function
1428 *
1429 * This function is responsible for handling SCSI operations which
1430 * are passed from the CAM layer.  The requests are in the form of
1431 * CAM control blocks which indicate the action being performed.
1432 * Not all actions require converting the request to a VSCSI protocol
1433 * message - these actions can be responded to by this driver.
1434 * Requests which are destined for a backend storage device are converted
1435 * to a VSCSI protocol message and sent on the channel connection associated
1436 * with this device.
1437 *
1438 * @param sim pointer to a CAM SCSI interface module
1439 * @param ccb pointer to a CAM control block
1440 */
1441static void
1442storvsc_action(struct cam_sim *sim, union ccb *ccb)
1443{
1444	struct storvsc_softc *sc = cam_sim_softc(sim);
1445	int res;
1446
1447	mtx_assert(&sc->hs_lock, MA_OWNED);
1448	switch (ccb->ccb_h.func_code) {
1449	case XPT_PATH_INQ: {
1450		struct ccb_pathinq *cpi = &ccb->cpi;
1451
1452		cpi->version_num = 1;
1453		cpi->hba_inquiry = PI_TAG_ABLE|PI_SDTR_ABLE;
1454		cpi->target_sprt = 0;
1455		cpi->hba_misc = PIM_NOBUSRESET;
1456		if (hv_storvsc_use_pim_unmapped)
1457			cpi->hba_misc |= PIM_UNMAPPED;
1458		cpi->maxio = STORVSC_DATA_SIZE_MAX;
1459		cpi->hba_eng_cnt = 0;
1460		cpi->max_target = STORVSC_MAX_TARGETS;
1461		cpi->max_lun = sc->hs_drv_props->drv_max_luns_per_target;
1462		cpi->initiator_id = cpi->max_target;
1463		cpi->bus_id = cam_sim_bus(sim);
1464		cpi->base_transfer_speed = 300000;
1465		cpi->transport = XPORT_SAS;
1466		cpi->transport_version = 0;
1467		cpi->protocol = PROTO_SCSI;
1468		cpi->protocol_version = SCSI_REV_SPC2;
1469		strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
1470		strncpy(cpi->hba_vid, sc->hs_drv_props->drv_name, HBA_IDLEN);
1471		strncpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
1472		cpi->unit_number = cam_sim_unit(sim);
1473
1474		ccb->ccb_h.status = CAM_REQ_CMP;
1475		xpt_done(ccb);
1476		return;
1477	}
1478	case XPT_GET_TRAN_SETTINGS: {
1479		struct  ccb_trans_settings *cts = &ccb->cts;
1480
1481		cts->transport = XPORT_SAS;
1482		cts->transport_version = 0;
1483		cts->protocol = PROTO_SCSI;
1484		cts->protocol_version = SCSI_REV_SPC2;
1485
1486		/* enable tag queuing and disconnected mode */
1487		cts->proto_specific.valid = CTS_SCSI_VALID_TQ;
1488		cts->proto_specific.scsi.valid = CTS_SCSI_VALID_TQ;
1489		cts->proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB;
1490		cts->xport_specific.valid = CTS_SPI_VALID_DISC;
1491		cts->xport_specific.spi.flags = CTS_SPI_FLAGS_DISC_ENB;
1492
1493		ccb->ccb_h.status = CAM_REQ_CMP;
1494		xpt_done(ccb);
1495		return;
1496	}
1497	case XPT_SET_TRAN_SETTINGS:	{
1498		ccb->ccb_h.status = CAM_REQ_CMP;
1499		xpt_done(ccb);
1500		return;
1501	}
1502	case XPT_CALC_GEOMETRY:{
1503		cam_calc_geometry(&ccb->ccg, 1);
1504		xpt_done(ccb);
1505		return;
1506	}
1507	case  XPT_RESET_BUS:
1508	case  XPT_RESET_DEV:{
1509#if HVS_HOST_RESET
1510		if ((res = hv_storvsc_host_reset(sc)) != 0) {
1511			xpt_print(ccb->ccb_h.path,
1512				"hv_storvsc_host_reset failed with %d\n", res);
1513			ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1514			xpt_done(ccb);
1515			return;
1516		}
1517		ccb->ccb_h.status = CAM_REQ_CMP;
1518		xpt_done(ccb);
1519		return;
1520#else
1521		xpt_print(ccb->ccb_h.path,
1522				  "%s reset not supported.\n",
1523				  (ccb->ccb_h.func_code == XPT_RESET_BUS)?
1524				  "bus" : "dev");
1525		ccb->ccb_h.status = CAM_REQ_INVALID;
1526		xpt_done(ccb);
1527		return;
1528#endif	/* HVS_HOST_RESET */
1529	}
1530	case XPT_SCSI_IO:
1531	case XPT_IMMED_NOTIFY: {
1532		struct hv_storvsc_request *reqp = NULL;
1533		bus_dmamap_t dmap_saved;
1534
1535		if (ccb->csio.cdb_len == 0) {
1536			panic("cdl_len is 0\n");
1537		}
1538
1539		if (LIST_EMPTY(&sc->hs_free_list)) {
1540			ccb->ccb_h.status = CAM_REQUEUE_REQ;
1541			if (sc->hs_frozen == 0) {
1542				sc->hs_frozen = 1;
1543				xpt_freeze_simq(sim, /* count*/1);
1544			}
1545			xpt_done(ccb);
1546			return;
1547		}
1548
1549		reqp = LIST_FIRST(&sc->hs_free_list);
1550		LIST_REMOVE(reqp, link);
1551
1552		/* Save the data_dmap before reset request */
1553		dmap_saved = reqp->data_dmap;
1554
1555		/* XXX this is ugly */
1556		bzero(reqp, sizeof(struct hv_storvsc_request));
1557
1558		/* Restore necessary bits */
1559		reqp->data_dmap = dmap_saved;
1560		reqp->softc = sc;
1561
1562		ccb->ccb_h.status |= CAM_SIM_QUEUED;
1563		if ((res = create_storvsc_request(ccb, reqp)) != 0) {
1564			ccb->ccb_h.status = CAM_REQ_INVALID;
1565			xpt_done(ccb);
1566			return;
1567		}
1568
1569#ifdef notyet
1570		if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
1571			callout_init(&reqp->callout, 1);
1572			callout_reset_sbt(&reqp->callout,
1573			    SBT_1MS * ccb->ccb_h.timeout, 0,
1574			    storvsc_timeout, reqp, 0);
1575#if HVS_TIMEOUT_TEST
1576			cv_init(&reqp->event.cv, "storvsc timeout cv");
1577			mtx_init(&reqp->event.mtx, "storvsc timeout mutex",
1578					NULL, MTX_DEF);
1579			switch (reqp->vstor_packet.vm_srb.cdb[0]) {
1580				case MODE_SELECT_10:
1581				case SEND_DIAGNOSTIC:
1582					/* To have timer send the request. */
1583					return;
1584				default:
1585					break;
1586			}
1587#endif /* HVS_TIMEOUT_TEST */
1588		}
1589#endif
1590
1591		if ((res = hv_storvsc_io_request(sc, reqp)) != 0) {
1592			xpt_print(ccb->ccb_h.path,
1593				"hv_storvsc_io_request failed with %d\n", res);
1594			ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1595			storvsc_free_request(sc, reqp);
1596			xpt_done(ccb);
1597			return;
1598		}
1599		return;
1600	}
1601
1602	default:
1603		ccb->ccb_h.status = CAM_REQ_INVALID;
1604		xpt_done(ccb);
1605		return;
1606	}
1607}
1608
1609/**
1610 * @brief destroy bounce buffer
1611 *
1612 * This function is responsible for destroy a Scatter/Gather list
1613 * that create by storvsc_create_bounce_buffer()
1614 *
1615 * @param sgl- the Scatter/Gather need be destroy
1616 * @param sg_count- page count of the SG list.
1617 *
1618 */
1619static void
1620storvsc_destroy_bounce_buffer(struct sglist *sgl)
1621{
1622	struct hv_sgl_node *sgl_node = NULL;
1623	if (LIST_EMPTY(&g_hv_sgl_page_pool.in_use_sgl_list)) {
1624		printf("storvsc error: not enough in use sgl\n");
1625		return;
1626	}
1627	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list);
1628	LIST_REMOVE(sgl_node, link);
1629	sgl_node->sgl_data = sgl;
1630	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link);
1631}
1632
1633/**
1634 * @brief create bounce buffer
1635 *
1636 * This function is responsible for create a Scatter/Gather list,
1637 * which hold several pages that can be aligned with page size.
1638 *
1639 * @param seg_count- SG-list segments count
1640 * @param write - if WRITE_TYPE, set SG list page used size to 0,
1641 * otherwise set used size to page size.
1642 *
1643 * return NULL if create failed
1644 */
1645static struct sglist *
1646storvsc_create_bounce_buffer(uint16_t seg_count, int write)
1647{
1648	int i = 0;
1649	struct sglist *bounce_sgl = NULL;
1650	unsigned int buf_len = ((write == WRITE_TYPE) ? 0 : PAGE_SIZE);
1651	struct hv_sgl_node *sgl_node = NULL;
1652
1653	/* get struct sglist from free_sgl_list */
1654	if (LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1655		printf("storvsc error: not enough free sgl\n");
1656		return NULL;
1657	}
1658	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1659	LIST_REMOVE(sgl_node, link);
1660	bounce_sgl = sgl_node->sgl_data;
1661	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.in_use_sgl_list, sgl_node, link);
1662
1663	bounce_sgl->sg_maxseg = seg_count;
1664
1665	if (write == WRITE_TYPE)
1666		bounce_sgl->sg_nseg = 0;
1667	else
1668		bounce_sgl->sg_nseg = seg_count;
1669
1670	for (i = 0; i < seg_count; i++)
1671	        bounce_sgl->sg_segs[i].ss_len = buf_len;
1672
1673	return bounce_sgl;
1674}
1675
1676/**
1677 * @brief copy data from SG list to bounce buffer
1678 *
1679 * This function is responsible for copy data from one SG list's segments
1680 * to another SG list which used as bounce buffer.
1681 *
1682 * @param bounce_sgl - the destination SG list
1683 * @param orig_sgl - the segment of the source SG list.
1684 * @param orig_sgl_count - the count of segments.
1685 * @param orig_sgl_count - indicate which segment need bounce buffer,
1686 *  set 1 means need.
1687 *
1688 */
1689static void
1690storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
1691			       bus_dma_segment_t *orig_sgl,
1692			       unsigned int orig_sgl_count,
1693			       uint64_t seg_bits)
1694{
1695	int src_sgl_idx = 0;
1696
1697	for (src_sgl_idx = 0; src_sgl_idx < orig_sgl_count; src_sgl_idx++) {
1698		if (seg_bits & (1 << src_sgl_idx)) {
1699			memcpy((void*)bounce_sgl->sg_segs[src_sgl_idx].ss_paddr,
1700			    (void*)orig_sgl[src_sgl_idx].ds_addr,
1701			    orig_sgl[src_sgl_idx].ds_len);
1702
1703			bounce_sgl->sg_segs[src_sgl_idx].ss_len =
1704			    orig_sgl[src_sgl_idx].ds_len;
1705		}
1706	}
1707}
1708
1709/**
1710 * @brief copy data from SG list which used as bounce to another SG list
1711 *
1712 * This function is responsible for copy data from one SG list with bounce
1713 * buffer to another SG list's segments.
1714 *
1715 * @param dest_sgl - the destination SG list's segments
1716 * @param dest_sgl_count - the count of destination SG list's segment.
1717 * @param src_sgl - the source SG list.
1718 * @param seg_bits - indicate which segment used bounce buffer of src SG-list.
1719 *
1720 */
1721void
1722storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
1723				    unsigned int dest_sgl_count,
1724				    struct sglist* src_sgl,
1725				    uint64_t seg_bits)
1726{
1727	int sgl_idx = 0;
1728
1729	for (sgl_idx = 0; sgl_idx < dest_sgl_count; sgl_idx++) {
1730		if (seg_bits & (1 << sgl_idx)) {
1731			memcpy((void*)(dest_sgl[sgl_idx].ds_addr),
1732			    (void*)(src_sgl->sg_segs[sgl_idx].ss_paddr),
1733			    src_sgl->sg_segs[sgl_idx].ss_len);
1734		}
1735	}
1736}
1737
1738/**
1739 * @brief check SG list with bounce buffer or not
1740 *
1741 * This function is responsible for check if need bounce buffer for SG list.
1742 *
1743 * @param sgl - the SG list's segments
1744 * @param sg_count - the count of SG list's segment.
1745 * @param bits - segmengs number that need bounce buffer
1746 *
1747 * return -1 if SG list needless bounce buffer
1748 */
1749static int
1750storvsc_check_bounce_buffer_sgl(bus_dma_segment_t *sgl,
1751				unsigned int sg_count,
1752				uint64_t *bits)
1753{
1754	int i = 0;
1755	int offset = 0;
1756	uint64_t phys_addr = 0;
1757	uint64_t tmp_bits = 0;
1758	boolean_t found_hole = FALSE;
1759	boolean_t pre_aligned = TRUE;
1760
1761	if (sg_count < 2){
1762		return -1;
1763	}
1764
1765	*bits = 0;
1766
1767	phys_addr = vtophys(sgl[0].ds_addr);
1768	offset =  phys_addr - trunc_page(phys_addr);
1769
1770	if (offset != 0) {
1771		pre_aligned = FALSE;
1772		tmp_bits |= 1;
1773	}
1774
1775	for (i = 1; i < sg_count; i++) {
1776		phys_addr = vtophys(sgl[i].ds_addr);
1777		offset =  phys_addr - trunc_page(phys_addr);
1778
1779		if (offset == 0) {
1780			if (FALSE == pre_aligned){
1781				/*
1782				 * This segment is aligned, if the previous
1783				 * one is not aligned, find a hole
1784				 */
1785				found_hole = TRUE;
1786			}
1787			pre_aligned = TRUE;
1788		} else {
1789			tmp_bits |= 1 << i;
1790			if (!pre_aligned) {
1791				if (phys_addr != vtophys(sgl[i-1].ds_addr +
1792				    sgl[i-1].ds_len)) {
1793					/*
1794					 * Check whether connect to previous
1795					 * segment,if not, find the hole
1796					 */
1797					found_hole = TRUE;
1798				}
1799			} else {
1800				found_hole = TRUE;
1801			}
1802			pre_aligned = FALSE;
1803		}
1804	}
1805
1806	if (!found_hole) {
1807		return (-1);
1808	} else {
1809		*bits = tmp_bits;
1810		return 0;
1811	}
1812}
1813
1814/**
1815 * Copy bus_dma segments to multiple page buffer, which requires
1816 * the pages are compact composed except for the 1st and last pages.
1817 */
1818static void
1819storvsc_xferbuf_prepare(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
1820{
1821	struct hv_storvsc_request *reqp = arg;
1822	union ccb *ccb = reqp->ccb;
1823	struct ccb_scsiio *csio = &ccb->csio;
1824	struct storvsc_gpa_range *prplist;
1825	int i;
1826
1827	prplist = &reqp->prp_list;
1828	prplist->gpa_range.gpa_len = csio->dxfer_len;
1829	prplist->gpa_range.gpa_ofs = segs[0].ds_addr & PAGE_MASK;
1830
1831	for (i = 0; i < nsegs; i++) {
1832#ifdef INVARIANTS
1833		if (nsegs > 1) {
1834			if (i == 0) {
1835				KASSERT((segs[i].ds_addr & PAGE_MASK) +
1836				    segs[i].ds_len == PAGE_SIZE,
1837				    ("invalid 1st page, ofs 0x%jx, len %zu",
1838				     (uintmax_t)segs[i].ds_addr,
1839				     segs[i].ds_len));
1840			} else if (i == nsegs - 1) {
1841				KASSERT((segs[i].ds_addr & PAGE_MASK) == 0,
1842				    ("invalid last page, ofs 0x%jx",
1843				     (uintmax_t)segs[i].ds_addr));
1844			} else {
1845				KASSERT((segs[i].ds_addr & PAGE_MASK) == 0 &&
1846				    segs[i].ds_len == PAGE_SIZE,
1847				    ("not a full page, ofs 0x%jx, len %zu",
1848				     (uintmax_t)segs[i].ds_addr,
1849				     segs[i].ds_len));
1850			}
1851		}
1852#endif
1853		prplist->gpa_page[i] = atop(segs[i].ds_addr);
1854	}
1855	reqp->prp_cnt = nsegs;
1856}
1857
1858/**
1859 * @brief Fill in a request structure based on a CAM control block
1860 *
1861 * Fills in a request structure based on the contents of a CAM control
1862 * block.  The request structure holds the payload information for
1863 * VSCSI protocol request.
1864 *
1865 * @param ccb pointer to a CAM contorl block
1866 * @param reqp pointer to a request structure
1867 */
1868static int
1869create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
1870{
1871	struct ccb_scsiio *csio = &ccb->csio;
1872	uint64_t phys_addr;
1873	uint32_t pfn;
1874	uint64_t not_aligned_seg_bits = 0;
1875	int error;
1876
1877	/* refer to struct vmscsi_req for meanings of these two fields */
1878	reqp->vstor_packet.u.vm_srb.port =
1879		cam_sim_unit(xpt_path_sim(ccb->ccb_h.path));
1880	reqp->vstor_packet.u.vm_srb.path_id =
1881		cam_sim_bus(xpt_path_sim(ccb->ccb_h.path));
1882
1883	reqp->vstor_packet.u.vm_srb.target_id = ccb->ccb_h.target_id;
1884	reqp->vstor_packet.u.vm_srb.lun = ccb->ccb_h.target_lun;
1885
1886	reqp->vstor_packet.u.vm_srb.cdb_len = csio->cdb_len;
1887	if(ccb->ccb_h.flags & CAM_CDB_POINTER) {
1888		memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_ptr,
1889			csio->cdb_len);
1890	} else {
1891		memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_bytes,
1892			csio->cdb_len);
1893	}
1894
1895	if (hv_storvsc_use_win8ext_flags) {
1896		reqp->vstor_packet.u.vm_srb.win8_extension.time_out_value = 60;
1897		reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1898			SRB_FLAGS_DISABLE_SYNCH_TRANSFER;
1899	}
1900	switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
1901	case CAM_DIR_OUT:
1902		reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE;
1903		if (hv_storvsc_use_win8ext_flags) {
1904			reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1905				SRB_FLAGS_DATA_OUT;
1906		}
1907		break;
1908	case CAM_DIR_IN:
1909		reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE;
1910		if (hv_storvsc_use_win8ext_flags) {
1911			reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1912				SRB_FLAGS_DATA_IN;
1913		}
1914		break;
1915	case CAM_DIR_NONE:
1916		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
1917		if (hv_storvsc_use_win8ext_flags) {
1918			reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1919				SRB_FLAGS_NO_DATA_TRANSFER;
1920		}
1921		break;
1922	default:
1923		printf("Error: unexpected data direction: 0x%x\n",
1924			ccb->ccb_h.flags & CAM_DIR_MASK);
1925		return (EINVAL);
1926	}
1927
1928	reqp->sense_data     = &csio->sense_data;
1929	reqp->sense_info_len = csio->sense_len;
1930
1931	reqp->ccb = ccb;
1932
1933	if (0 == csio->dxfer_len) {
1934		return (0);
1935	}
1936
1937	switch (ccb->ccb_h.flags & CAM_DATA_MASK) {
1938	case CAM_DATA_BIO:
1939	case CAM_DATA_VADDR:
1940		error = bus_dmamap_load_ccb(reqp->softc->storvsc_req_dtag,
1941		    reqp->data_dmap, ccb, storvsc_xferbuf_prepare, reqp,
1942		    BUS_DMA_NOWAIT);
1943		if (error) {
1944			xpt_print(ccb->ccb_h.path,
1945			    "bus_dmamap_load_ccb failed: %d\n", error);
1946			return (error);
1947		}
1948		if ((ccb->ccb_h.flags & CAM_DATA_MASK) == CAM_DATA_BIO)
1949			reqp->softc->sysctl_data.data_bio_cnt++;
1950		else
1951			reqp->softc->sysctl_data.data_vaddr_cnt++;
1952		break;
1953
1954	case CAM_DATA_SG:
1955	{
1956		struct storvsc_gpa_range *prplist;
1957		int i = 0;
1958		int offset = 0;
1959		int ret;
1960
1961		bus_dma_segment_t *storvsc_sglist =
1962		    (bus_dma_segment_t *)ccb->csio.data_ptr;
1963		u_int16_t storvsc_sg_count = ccb->csio.sglist_cnt;
1964
1965		prplist = &reqp->prp_list;
1966		prplist->gpa_range.gpa_len = csio->dxfer_len;
1967
1968		printf("Storvsc: get SG I/O operation, %d\n",
1969		    reqp->vstor_packet.u.vm_srb.data_in);
1970
1971		if (storvsc_sg_count > STORVSC_DATA_SEGCNT_MAX){
1972			printf("Storvsc: %d segments is too much, "
1973			    "only support %d segments\n",
1974			    storvsc_sg_count, STORVSC_DATA_SEGCNT_MAX);
1975			return (EINVAL);
1976		}
1977
1978		/*
1979		 * We create our own bounce buffer function currently. Idealy
1980		 * we should use BUS_DMA(9) framework. But with current BUS_DMA
1981		 * code there is no callback API to check the page alignment of
1982		 * middle segments before busdma can decide if a bounce buffer
1983		 * is needed for particular segment. There is callback,
1984		 * "bus_dma_filter_t *filter", but the parrameters are not
1985		 * sufficient for storvsc driver.
1986		 * TODO:
1987		 *	Add page alignment check in BUS_DMA(9) callback. Once
1988		 *	this is complete, switch the following code to use
1989		 *	BUS_DMA(9) for storvsc bounce buffer support.
1990		 */
1991		/* check if we need to create bounce buffer */
1992		ret = storvsc_check_bounce_buffer_sgl(storvsc_sglist,
1993		    storvsc_sg_count, &not_aligned_seg_bits);
1994		if (ret != -1) {
1995			reqp->bounce_sgl =
1996			    storvsc_create_bounce_buffer(storvsc_sg_count,
1997			    reqp->vstor_packet.u.vm_srb.data_in);
1998			if (NULL == reqp->bounce_sgl) {
1999				printf("Storvsc_error: "
2000				    "create bounce buffer failed.\n");
2001				return (ENOMEM);
2002			}
2003
2004			reqp->bounce_sgl_count = storvsc_sg_count;
2005			reqp->not_aligned_seg_bits = not_aligned_seg_bits;
2006
2007			/*
2008			 * if it is write, we need copy the original data
2009			 *to bounce buffer
2010			 */
2011			if (WRITE_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
2012				storvsc_copy_sgl_to_bounce_buf(
2013				    reqp->bounce_sgl,
2014				    storvsc_sglist,
2015				    storvsc_sg_count,
2016				    reqp->not_aligned_seg_bits);
2017			}
2018
2019			/* transfer virtual address to physical frame number */
2020			if (reqp->not_aligned_seg_bits & 0x1){
2021 				phys_addr =
2022				    vtophys(reqp->bounce_sgl->sg_segs[0].ss_paddr);
2023			}else{
2024 				phys_addr =
2025					vtophys(storvsc_sglist[0].ds_addr);
2026			}
2027			prplist->gpa_range.gpa_ofs = phys_addr & PAGE_MASK;
2028
2029			pfn = phys_addr >> PAGE_SHIFT;
2030			prplist->gpa_page[0] = pfn;
2031
2032			for (i = 1; i < storvsc_sg_count; i++) {
2033				if (reqp->not_aligned_seg_bits & (1 << i)) {
2034					phys_addr =
2035					    vtophys(reqp->bounce_sgl->sg_segs[i].ss_paddr);
2036				} else {
2037					phys_addr =
2038					    vtophys(storvsc_sglist[i].ds_addr);
2039				}
2040
2041				pfn = phys_addr >> PAGE_SHIFT;
2042				prplist->gpa_page[i] = pfn;
2043			}
2044			reqp->prp_cnt = i;
2045		} else {
2046			phys_addr = vtophys(storvsc_sglist[0].ds_addr);
2047
2048			prplist->gpa_range.gpa_ofs = phys_addr & PAGE_MASK;
2049
2050			for (i = 0; i < storvsc_sg_count; i++) {
2051				phys_addr = vtophys(storvsc_sglist[i].ds_addr);
2052				pfn = phys_addr >> PAGE_SHIFT;
2053				prplist->gpa_page[i] = pfn;
2054			}
2055			reqp->prp_cnt = i;
2056
2057			/* check the last segment cross boundary or not */
2058			offset = phys_addr & PAGE_MASK;
2059			if (offset) {
2060				/* Add one more PRP entry */
2061				phys_addr =
2062				    vtophys(storvsc_sglist[i-1].ds_addr +
2063				    PAGE_SIZE - offset);
2064				pfn = phys_addr >> PAGE_SHIFT;
2065				prplist->gpa_page[i] = pfn;
2066				reqp->prp_cnt++;
2067			}
2068
2069			reqp->bounce_sgl_count = 0;
2070		}
2071		reqp->softc->sysctl_data.data_sg_cnt++;
2072		break;
2073	}
2074	default:
2075		printf("Unknow flags: %d\n", ccb->ccb_h.flags);
2076		return(EINVAL);
2077	}
2078
2079	return(0);
2080}
2081
2082/**
2083 * @brief completion function before returning to CAM
2084 *
2085 * I/O process has been completed and the result needs
2086 * to be passed to the CAM layer.
2087 * Free resources related to this request.
2088 *
2089 * @param reqp pointer to a request structure
2090 */
2091static void
2092storvsc_io_done(struct hv_storvsc_request *reqp)
2093{
2094	union ccb *ccb = reqp->ccb;
2095	struct ccb_scsiio *csio = &ccb->csio;
2096	struct storvsc_softc *sc = reqp->softc;
2097	struct vmscsi_req *vm_srb = &reqp->vstor_packet.u.vm_srb;
2098	bus_dma_segment_t *ori_sglist = NULL;
2099	int ori_sg_count = 0;
2100	/* destroy bounce buffer if it is used */
2101	if (reqp->bounce_sgl_count) {
2102		ori_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr;
2103		ori_sg_count = ccb->csio.sglist_cnt;
2104
2105		/*
2106		 * If it is READ operation, we should copy back the data
2107		 * to original SG list.
2108		 */
2109		if (READ_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
2110			storvsc_copy_from_bounce_buf_to_sgl(ori_sglist,
2111			    ori_sg_count,
2112			    reqp->bounce_sgl,
2113			    reqp->not_aligned_seg_bits);
2114		}
2115
2116		storvsc_destroy_bounce_buffer(reqp->bounce_sgl);
2117		reqp->bounce_sgl_count = 0;
2118	}
2119
2120	if (reqp->retries > 0) {
2121		mtx_lock(&sc->hs_lock);
2122#if HVS_TIMEOUT_TEST
2123		xpt_print(ccb->ccb_h.path,
2124			"%u: IO returned after timeout, "
2125			"waking up timer handler if any.\n", ticks);
2126		mtx_lock(&reqp->event.mtx);
2127		cv_signal(&reqp->event.cv);
2128		mtx_unlock(&reqp->event.mtx);
2129#endif
2130		reqp->retries = 0;
2131		xpt_print(ccb->ccb_h.path,
2132			"%u: IO returned after timeout, "
2133			"stopping timer if any.\n", ticks);
2134		mtx_unlock(&sc->hs_lock);
2135	}
2136
2137#ifdef notyet
2138	/*
2139	 * callout_drain() will wait for the timer handler to finish
2140	 * if it is running. So we don't need any lock to synchronize
2141	 * between this routine and the timer handler.
2142	 * Note that we need to make sure reqp is not freed when timer
2143	 * handler is using or will use it.
2144	 */
2145	if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
2146		callout_drain(&reqp->callout);
2147	}
2148#endif
2149
2150	ccb->ccb_h.status &= ~CAM_SIM_QUEUED;
2151	ccb->ccb_h.status &= ~CAM_STATUS_MASK;
2152	if (vm_srb->scsi_status == SCSI_STATUS_OK) {
2153		const struct scsi_generic *cmd;
2154		cmd = (const struct scsi_generic *)
2155		    ((ccb->ccb_h.flags & CAM_CDB_POINTER) ?
2156		     csio->cdb_io.cdb_ptr : csio->cdb_io.cdb_bytes);
2157		if (vm_srb->srb_status != SRB_STATUS_SUCCESS) {
2158			/*
2159			 * If there are errors, for example, invalid LUN,
2160			 * host will inform VM through SRB status.
2161			 */
2162			if (bootverbose) {
2163				if (vm_srb->srb_status == SRB_STATUS_INVALID_LUN) {
2164					xpt_print(ccb->ccb_h.path,
2165					    "invalid LUN %d for op: %s\n",
2166					    vm_srb->lun,
2167					    scsi_op_desc(cmd->opcode, NULL));
2168				} else {
2169					xpt_print(ccb->ccb_h.path,
2170					    "Unknown SRB flag: %d for op: %s\n",
2171					    vm_srb->srb_status,
2172					    scsi_op_desc(cmd->opcode, NULL));
2173				}
2174			}
2175
2176			/*
2177			 * XXX For a selection timeout, all of the LUNs
2178			 * on the target will be gone.  It works for SCSI
2179			 * disks, but does not work for IDE disks.
2180			 *
2181			 * For CAM_DEV_NOT_THERE, CAM will only get
2182			 * rid of the device(s) specified by the path.
2183			 */
2184			if (storvsc_get_storage_type(sc->hs_dev) ==
2185			    DRIVER_STORVSC)
2186				ccb->ccb_h.status |= CAM_SEL_TIMEOUT;
2187			else
2188				ccb->ccb_h.status |= CAM_DEV_NOT_THERE;
2189		} else {
2190			ccb->ccb_h.status |= CAM_REQ_CMP;
2191		}
2192
2193		if (cmd->opcode == INQUIRY) {
2194			struct scsi_inquiry_data *inq_data =
2195			    (struct scsi_inquiry_data *)csio->data_ptr;
2196			uint8_t *resp_buf = (uint8_t *)csio->data_ptr;
2197			int resp_xfer_len, resp_buf_len, data_len;
2198
2199			/* Get the buffer length reported by host */
2200			resp_xfer_len = vm_srb->transfer_len;
2201			/* Get the available buffer length */
2202			resp_buf_len = resp_xfer_len >= 5 ? resp_buf[4] + 5 : 0;
2203			data_len = (resp_buf_len < resp_xfer_len) ?
2204			    resp_buf_len : resp_xfer_len;
2205
2206			if (bootverbose && data_len >= 5) {
2207				xpt_print(ccb->ccb_h.path, "storvsc inquiry "
2208				    "(%d) [%x %x %x %x %x ... ]\n", data_len,
2209				    resp_buf[0], resp_buf[1], resp_buf[2],
2210				    resp_buf[3], resp_buf[4]);
2211			}
2212			if (vm_srb->srb_status == SRB_STATUS_SUCCESS &&
2213			    data_len >= SHORT_INQUIRY_LENGTH) {
2214				char vendor[16];
2215
2216				cam_strvis(vendor, inq_data->vendor,
2217				    sizeof(inq_data->vendor), sizeof(vendor));
2218
2219				/*
2220				 * XXX: Upgrade SPC2 to SPC3 if host is WIN8 or
2221				 * WIN2012 R2 in order to support UNMAP feature.
2222				 */
2223				if (!strncmp(vendor, "Msft", 4) &&
2224				    SID_ANSI_REV(inq_data) == SCSI_REV_SPC2 &&
2225				    (vmstor_proto_version ==
2226				     VMSTOR_PROTOCOL_VERSION_WIN8_1 ||
2227				     vmstor_proto_version ==
2228				     VMSTOR_PROTOCOL_VERSION_WIN8)) {
2229					inq_data->version = SCSI_REV_SPC3;
2230					if (bootverbose) {
2231						xpt_print(ccb->ccb_h.path,
2232						    "storvsc upgrades "
2233						    "SPC2 to SPC3\n");
2234					}
2235				}
2236			}
2237		}
2238	} else {
2239		mtx_lock(&sc->hs_lock);
2240		xpt_print(ccb->ccb_h.path,
2241			"storvsc scsi_status = %d\n",
2242			vm_srb->scsi_status);
2243		mtx_unlock(&sc->hs_lock);
2244		ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR;
2245	}
2246
2247	ccb->csio.scsi_status = (vm_srb->scsi_status & 0xFF);
2248	ccb->csio.resid = ccb->csio.dxfer_len - vm_srb->transfer_len;
2249
2250	if (reqp->sense_info_len != 0) {
2251		csio->sense_resid = csio->sense_len - reqp->sense_info_len;
2252		ccb->ccb_h.status |= CAM_AUTOSNS_VALID;
2253	}
2254
2255	mtx_lock(&sc->hs_lock);
2256	if (reqp->softc->hs_frozen == 1) {
2257		xpt_print(ccb->ccb_h.path,
2258			"%u: storvsc unfreezing softc 0x%p.\n",
2259			ticks, reqp->softc);
2260		ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
2261		reqp->softc->hs_frozen = 0;
2262	}
2263	storvsc_free_request(sc, reqp);
2264	mtx_unlock(&sc->hs_lock);
2265
2266	xpt_done_direct(ccb);
2267}
2268
2269/**
2270 * @brief Free a request structure
2271 *
2272 * Free a request structure by returning it to the free list
2273 *
2274 * @param sc pointer to a softc
2275 * @param reqp pointer to a request structure
2276 */
2277static void
2278storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp)
2279{
2280
2281	LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
2282}
2283
2284/**
2285 * @brief Determine type of storage device from GUID
2286 *
2287 * Using the type GUID, determine if this is a StorVSC (paravirtual
2288 * SCSI or BlkVSC (paravirtual IDE) device.
2289 *
2290 * @param dev a device
2291 * returns an enum
2292 */
2293static enum hv_storage_type
2294storvsc_get_storage_type(device_t dev)
2295{
2296	device_t parent = device_get_parent(dev);
2297
2298	if (VMBUS_PROBE_GUID(parent, dev, &gBlkVscDeviceType) == 0)
2299		return DRIVER_BLKVSC;
2300	if (VMBUS_PROBE_GUID(parent, dev, &gStorVscDeviceType) == 0)
2301		return DRIVER_STORVSC;
2302	return DRIVER_UNKNOWN;
2303}
2304
2305#define	PCI_VENDOR_INTEL	0x8086
2306#define	PCI_PRODUCT_PIIX4	0x7111
2307
2308static void
2309storvsc_ada_probe_veto(void *arg __unused, struct cam_path *path,
2310    struct ata_params *ident_buf __unused, int *veto)
2311{
2312
2313	/*
2314	 * The ATA disks are shared with the controllers managed
2315	 * by this driver, so veto the ATA disks' attachment; the
2316	 * ATA disks will be attached as SCSI disks once this driver
2317	 * attached.
2318	 */
2319	if (path->device->protocol == PROTO_ATA) {
2320		struct ccb_pathinq cpi;
2321
2322		bzero(&cpi, sizeof(cpi));
2323		xpt_setup_ccb(&cpi.ccb_h, path, CAM_PRIORITY_NONE);
2324		cpi.ccb_h.func_code = XPT_PATH_INQ;
2325		xpt_action((union ccb *)&cpi);
2326		if (cpi.ccb_h.status == CAM_REQ_CMP &&
2327		    cpi.hba_vendor == PCI_VENDOR_INTEL &&
2328		    cpi.hba_device == PCI_PRODUCT_PIIX4) {
2329			(*veto)++;
2330			if (bootverbose) {
2331				xpt_print(path,
2332				    "Disable ATA disks on "
2333				    "simulated ATA controller (0x%04x%04x)\n",
2334				    cpi.hba_device, cpi.hba_vendor);
2335			}
2336		}
2337	}
2338}
2339
2340static void
2341storvsc_sysinit(void *arg __unused)
2342{
2343	if (vm_guest == VM_GUEST_HV) {
2344		storvsc_handler_tag = EVENTHANDLER_REGISTER(ada_probe_veto,
2345		    storvsc_ada_probe_veto, NULL, EVENTHANDLER_PRI_ANY);
2346	}
2347}
2348SYSINIT(storvsc_sys_init, SI_SUB_DRIVERS, SI_ORDER_SECOND, storvsc_sysinit,
2349    NULL);
2350
2351static void
2352storvsc_sysuninit(void *arg __unused)
2353{
2354	if (storvsc_handler_tag != NULL)
2355		EVENTHANDLER_DEREGISTER(ada_probe_veto, storvsc_handler_tag);
2356}
2357SYSUNINIT(storvsc_sys_uninit, SI_SUB_DRIVERS, SI_ORDER_SECOND,
2358    storvsc_sysuninit, NULL);
2359