1/*-
2 * Copyright (c) 2009-2012 Microsoft Corp.
3 * Copyright (c) 2010-2012 Citrix Inc.
4 * Copyright (c) 2012 NetApp Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $FreeBSD$
29 */
30
31/**
32 * HyperV vmbus network VSC (virtual services client) module
33 *
34 */
35
36
37#include <sys/param.h>
38#include <sys/kernel.h>
39#include <sys/socket.h>
40#include <sys/lock.h>
41#include <net/if.h>
42#include <net/if_arp.h>
43#include <machine/bus.h>
44#include <machine/atomic.h>
45
46#include <dev/hyperv/include/hyperv.h>
47#include "hv_net_vsc.h"
48#include "hv_rndis.h"
49#include "hv_rndis_filter.h"
50
51
52/*
53 * Forward declarations
54 */
55static void hv_nv_on_channel_callback(void *context);
56static int  hv_nv_init_send_buffer_with_net_vsp(struct hv_device *device);
57static int  hv_nv_init_rx_buffer_with_net_vsp(struct hv_device *device);
58static int  hv_nv_destroy_send_buffer(netvsc_dev *net_dev);
59static int  hv_nv_destroy_rx_buffer(netvsc_dev *net_dev);
60static int  hv_nv_connect_to_vsp(struct hv_device *device);
61static void hv_nv_on_send_completion(struct hv_device *device,
62				     hv_vm_packet_descriptor *pkt);
63static void hv_nv_on_receive(struct hv_device *device,
64			     hv_vm_packet_descriptor *pkt);
65static void hv_nv_send_receive_completion(struct hv_device *device,
66					  uint64_t tid);
67
68
69/*
70 *
71 */
72static inline netvsc_dev *
73hv_nv_alloc_net_device(struct hv_device *device)
74{
75	netvsc_dev *net_dev;
76	hn_softc_t *sc = device_get_softc(device->device);
77
78	net_dev = malloc(sizeof(netvsc_dev), M_DEVBUF, M_NOWAIT | M_ZERO);
79	if (net_dev == NULL) {
80		return (NULL);
81	}
82
83	net_dev->dev = device;
84	net_dev->destroy = FALSE;
85	sc->net_dev = net_dev;
86
87	return (net_dev);
88}
89
90/*
91 *
92 */
93static inline netvsc_dev *
94hv_nv_get_outbound_net_device(struct hv_device *device)
95{
96	hn_softc_t *sc = device_get_softc(device->device);
97	netvsc_dev *net_dev = sc->net_dev;;
98
99	if ((net_dev != NULL) && net_dev->destroy) {
100		return (NULL);
101	}
102
103	return (net_dev);
104}
105
106/*
107 *
108 */
109static inline netvsc_dev *
110hv_nv_get_inbound_net_device(struct hv_device *device)
111{
112	hn_softc_t *sc = device_get_softc(device->device);
113	netvsc_dev *net_dev = sc->net_dev;;
114
115	if (net_dev == NULL) {
116		return (net_dev);
117	}
118	/*
119	 * When the device is being destroyed; we only
120	 * permit incoming packets if and only if there
121	 * are outstanding sends.
122	 */
123	if (net_dev->destroy && net_dev->num_outstanding_sends == 0) {
124		return (NULL);
125	}
126
127	return (net_dev);
128}
129
130/*
131 * Net VSC initialize receive buffer with net VSP
132 *
133 * Net VSP:  Network virtual services client, also known as the
134 *     Hyper-V extensible switch and the synthetic data path.
135 */
136static int
137hv_nv_init_rx_buffer_with_net_vsp(struct hv_device *device)
138{
139	netvsc_dev *net_dev;
140	nvsp_msg *init_pkt;
141	int ret = 0;
142
143	net_dev = hv_nv_get_outbound_net_device(device);
144	if (!net_dev) {
145		return (ENODEV);
146	}
147
148	net_dev->rx_buf = contigmalloc(net_dev->rx_buf_size, M_DEVBUF,
149	    M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
150	if (net_dev->rx_buf == NULL) {
151		ret = ENOMEM;
152		goto cleanup;
153	}
154
155	/*
156	 * Establish the GPADL handle for this buffer on this channel.
157	 * Note:  This call uses the vmbus connection rather than the
158	 * channel to establish the gpadl handle.
159	 * GPADL:  Guest physical address descriptor list.
160	 */
161	ret = hv_vmbus_channel_establish_gpadl(
162		device->channel, net_dev->rx_buf,
163		net_dev->rx_buf_size, &net_dev->rx_buf_gpadl_handle);
164	if (ret != 0) {
165		goto cleanup;
166	}
167
168	/* sema_wait(&ext->channel_init_sema); KYS CHECK */
169
170	/* Notify the NetVsp of the gpadl handle */
171	init_pkt = &net_dev->channel_init_packet;
172
173	memset(init_pkt, 0, sizeof(nvsp_msg));
174
175	init_pkt->hdr.msg_type = nvsp_msg_1_type_send_rx_buf;
176	init_pkt->msgs.vers_1_msgs.send_rx_buf.gpadl_handle =
177	    net_dev->rx_buf_gpadl_handle;
178	init_pkt->msgs.vers_1_msgs.send_rx_buf.id =
179	    NETVSC_RECEIVE_BUFFER_ID;
180
181	/* Send the gpadl notification request */
182
183	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
184	    sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
185	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
186	    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
187	if (ret != 0) {
188		goto cleanup;
189	}
190
191	sema_wait(&net_dev->channel_init_sema);
192
193	/* Check the response */
194	if (init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.status
195	    != nvsp_status_success) {
196		ret = EINVAL;
197		goto cleanup;
198	}
199
200	net_dev->rx_section_count =
201	    init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.num_sections;
202
203	net_dev->rx_sections = malloc(net_dev->rx_section_count *
204	    sizeof(nvsp_1_rx_buf_section), M_DEVBUF, M_NOWAIT);
205	if (net_dev->rx_sections == NULL) {
206		ret = EINVAL;
207		goto cleanup;
208	}
209	memcpy(net_dev->rx_sections,
210	    init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.sections,
211	    net_dev->rx_section_count * sizeof(nvsp_1_rx_buf_section));
212
213
214	/*
215	 * For first release, there should only be 1 section that represents
216	 * the entire receive buffer
217	 */
218	if (net_dev->rx_section_count != 1
219	    || net_dev->rx_sections->offset != 0) {
220		ret = EINVAL;
221		goto cleanup;
222	}
223
224	goto exit;
225
226cleanup:
227	hv_nv_destroy_rx_buffer(net_dev);
228
229exit:
230	return (ret);
231}
232
233/*
234 * Net VSC initialize send buffer with net VSP
235 */
236static int
237hv_nv_init_send_buffer_with_net_vsp(struct hv_device *device)
238{
239	netvsc_dev *net_dev;
240	nvsp_msg *init_pkt;
241	int ret = 0;
242
243	net_dev = hv_nv_get_outbound_net_device(device);
244	if (!net_dev) {
245		return (ENODEV);
246	}
247
248	net_dev->send_buf  = contigmalloc(net_dev->send_buf_size, M_DEVBUF,
249	    M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
250	if (net_dev->send_buf == NULL) {
251		ret = ENOMEM;
252		goto cleanup;
253	}
254
255	/*
256	 * Establish the gpadl handle for this buffer on this channel.
257	 * Note:  This call uses the vmbus connection rather than the
258	 * channel to establish the gpadl handle.
259	 */
260	ret = hv_vmbus_channel_establish_gpadl(device->channel,
261	    net_dev->send_buf, net_dev->send_buf_size,
262	    &net_dev->send_buf_gpadl_handle);
263	if (ret != 0) {
264		goto cleanup;
265	}
266
267	/* Notify the NetVsp of the gpadl handle */
268
269	init_pkt = &net_dev->channel_init_packet;
270
271	memset(init_pkt, 0, sizeof(nvsp_msg));
272
273	init_pkt->hdr.msg_type = nvsp_msg_1_type_send_send_buf;
274	init_pkt->msgs.vers_1_msgs.send_rx_buf.gpadl_handle =
275	    net_dev->send_buf_gpadl_handle;
276	init_pkt->msgs.vers_1_msgs.send_rx_buf.id =
277	    NETVSC_SEND_BUFFER_ID;
278
279	/* Send the gpadl notification request */
280
281	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
282	    sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
283	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
284	    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
285	if (ret != 0) {
286		goto cleanup;
287	}
288
289	sema_wait(&net_dev->channel_init_sema);
290
291	/* Check the response */
292	if (init_pkt->msgs.vers_1_msgs.send_send_buf_complete.status
293	    != nvsp_status_success) {
294		ret = EINVAL;
295		goto cleanup;
296	}
297
298	net_dev->send_section_size =
299	    init_pkt->msgs.vers_1_msgs.send_send_buf_complete.section_size;
300
301	goto exit;
302
303cleanup:
304	hv_nv_destroy_send_buffer(net_dev);
305
306exit:
307	return (ret);
308}
309
310/*
311 * Net VSC destroy receive buffer
312 */
313static int
314hv_nv_destroy_rx_buffer(netvsc_dev *net_dev)
315{
316	nvsp_msg *revoke_pkt;
317	int ret = 0;
318
319	/*
320	 * If we got a section count, it means we received a
321	 * send_rx_buf_complete msg
322	 * (ie sent nvsp_msg_1_type_send_rx_buf msg) therefore,
323	 * we need to send a revoke msg here
324	 */
325	if (net_dev->rx_section_count) {
326		/* Send the revoke receive buffer */
327		revoke_pkt = &net_dev->revoke_packet;
328		memset(revoke_pkt, 0, sizeof(nvsp_msg));
329
330		revoke_pkt->hdr.msg_type = nvsp_msg_1_type_revoke_rx_buf;
331		revoke_pkt->msgs.vers_1_msgs.revoke_rx_buf.id =
332		    NETVSC_RECEIVE_BUFFER_ID;
333
334		ret = hv_vmbus_channel_send_packet(net_dev->dev->channel,
335		    revoke_pkt, sizeof(nvsp_msg),
336		    (uint64_t)(uintptr_t)revoke_pkt,
337		    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
338
339		/*
340		 * If we failed here, we might as well return and have a leak
341		 * rather than continue and a bugchk
342		 */
343		if (ret != 0) {
344			return (ret);
345		}
346	}
347
348	/* Tear down the gpadl on the vsp end */
349	if (net_dev->rx_buf_gpadl_handle) {
350		ret = hv_vmbus_channel_teardown_gpdal(net_dev->dev->channel,
351		    net_dev->rx_buf_gpadl_handle);
352		/*
353		 * If we failed here, we might as well return and have a leak
354		 * rather than continue and a bugchk
355		 */
356		if (ret != 0) {
357			return (ret);
358		}
359		net_dev->rx_buf_gpadl_handle = 0;
360	}
361
362	if (net_dev->rx_buf) {
363		/* Free up the receive buffer */
364		contigfree(net_dev->rx_buf, net_dev->rx_buf_size, M_DEVBUF);
365		net_dev->rx_buf = NULL;
366	}
367
368	if (net_dev->rx_sections) {
369		free(net_dev->rx_sections, M_DEVBUF);
370		net_dev->rx_sections = NULL;
371		net_dev->rx_section_count = 0;
372	}
373
374	return (ret);
375}
376
377/*
378 * Net VSC destroy send buffer
379 */
380static int
381hv_nv_destroy_send_buffer(netvsc_dev *net_dev)
382{
383	nvsp_msg *revoke_pkt;
384	int ret = 0;
385
386	/*
387	 * If we got a section count, it means we received a
388	 * send_rx_buf_complete msg
389	 * (ie sent nvsp_msg_1_type_send_rx_buf msg) therefore,
390	 * we need to send a revoke msg here
391	 */
392	if (net_dev->send_section_size) {
393		/* Send the revoke send buffer */
394		revoke_pkt = &net_dev->revoke_packet;
395		memset(revoke_pkt, 0, sizeof(nvsp_msg));
396
397		revoke_pkt->hdr.msg_type =
398		    nvsp_msg_1_type_revoke_send_buf;
399		revoke_pkt->msgs.vers_1_msgs.revoke_send_buf.id =
400		    NETVSC_SEND_BUFFER_ID;
401
402		ret = hv_vmbus_channel_send_packet(net_dev->dev->channel,
403		    revoke_pkt, sizeof(nvsp_msg),
404		    (uint64_t)(uintptr_t)revoke_pkt,
405		    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
406		/*
407		 * If we failed here, we might as well return and have a leak
408		 * rather than continue and a bugchk
409		 */
410		if (ret != 0) {
411			return (ret);
412		}
413	}
414
415	/* Tear down the gpadl on the vsp end */
416	if (net_dev->send_buf_gpadl_handle) {
417		ret = hv_vmbus_channel_teardown_gpdal(net_dev->dev->channel,
418		    net_dev->send_buf_gpadl_handle);
419
420		/*
421		 * If we failed here, we might as well return and have a leak
422		 * rather than continue and a bugchk
423		 */
424		if (ret != 0) {
425			return (ret);
426		}
427		net_dev->send_buf_gpadl_handle = 0;
428	}
429
430	if (net_dev->send_buf) {
431		/* Free up the receive buffer */
432		contigfree(net_dev->send_buf, net_dev->send_buf_size, M_DEVBUF);
433		net_dev->send_buf = NULL;
434	}
435
436	return (ret);
437}
438
439
440/*
441 * Attempt to negotiate the caller-specified NVSP version
442 *
443 * For NVSP v2, Server 2008 R2 does not set
444 * init_pkt->msgs.init_msgs.init_compl.negotiated_prot_vers
445 * to the negotiated version, so we cannot rely on that.
446 */
447static int
448hv_nv_negotiate_nvsp_protocol(struct hv_device *device, netvsc_dev *net_dev,
449			      uint32_t nvsp_ver)
450{
451	nvsp_msg *init_pkt;
452	int ret;
453
454	init_pkt = &net_dev->channel_init_packet;
455	memset(init_pkt, 0, sizeof(nvsp_msg));
456	init_pkt->hdr.msg_type = nvsp_msg_type_init;
457
458	/*
459	 * Specify parameter as the only acceptable protocol version
460	 */
461	init_pkt->msgs.init_msgs.init.p1.protocol_version = nvsp_ver;
462	init_pkt->msgs.init_msgs.init.protocol_version_2 = nvsp_ver;
463
464	/* Send the init request */
465	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
466	    sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
467	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
468	    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
469	if (ret != 0)
470		return (-1);
471
472	sema_wait(&net_dev->channel_init_sema);
473
474	if (init_pkt->msgs.init_msgs.init_compl.status != nvsp_status_success)
475		return (EINVAL);
476
477	return (0);
478}
479
480/*
481 * Send NDIS version 2 config packet containing MTU.
482 *
483 * Not valid for NDIS version 1.
484 */
485static int
486hv_nv_send_ndis_config(struct hv_device *device, uint32_t mtu)
487{
488	netvsc_dev *net_dev;
489	nvsp_msg *init_pkt;
490	int ret;
491
492	net_dev = hv_nv_get_outbound_net_device(device);
493	if (!net_dev)
494		return (-ENODEV);
495
496	/*
497	 * Set up configuration packet, write MTU
498	 * Indicate we are capable of handling VLAN tags
499	 */
500	init_pkt = &net_dev->channel_init_packet;
501	memset(init_pkt, 0, sizeof(nvsp_msg));
502	init_pkt->hdr.msg_type = nvsp_msg_2_type_send_ndis_config;
503	init_pkt->msgs.vers_2_msgs.send_ndis_config.mtu = mtu;
504	init_pkt->
505		msgs.vers_2_msgs.send_ndis_config.capabilities.u1.u2.ieee8021q
506		= 1;
507
508	/* Send the configuration packet */
509	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
510	    sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
511	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
512	if (ret != 0)
513		return (-EINVAL);
514
515	return (0);
516}
517
518/*
519 * Net VSC connect to VSP
520 */
521static int
522hv_nv_connect_to_vsp(struct hv_device *device)
523{
524	netvsc_dev *net_dev;
525	nvsp_msg *init_pkt;
526	uint32_t nvsp_vers;
527	uint32_t ndis_version;
528	int ret = 0;
529	device_t dev = device->device;
530	hn_softc_t *sc = device_get_softc(dev);
531	struct ifnet *ifp = sc->arpcom.ac_ifp;
532
533	net_dev = hv_nv_get_outbound_net_device(device);
534	if (!net_dev) {
535		return (ENODEV);
536	}
537
538	/*
539	 * Negotiate the NVSP version.  Try NVSP v2 first.
540	 */
541	nvsp_vers = NVSP_PROTOCOL_VERSION_2;
542	ret = hv_nv_negotiate_nvsp_protocol(device, net_dev, nvsp_vers);
543	if (ret != 0) {
544		/* NVSP v2 failed, try NVSP v1 */
545		nvsp_vers = NVSP_PROTOCOL_VERSION_1;
546		ret = hv_nv_negotiate_nvsp_protocol(device, net_dev, nvsp_vers);
547		if (ret != 0) {
548			/* NVSP v1 failed, return bad status */
549			return (ret);
550		}
551	}
552	net_dev->nvsp_version = nvsp_vers;
553
554	/*
555	 * Set the MTU if supported by this NVSP protocol version
556	 * This needs to be right after the NVSP init message per Haiyang
557	 */
558	if (nvsp_vers >= NVSP_PROTOCOL_VERSION_2)
559		ret = hv_nv_send_ndis_config(device, ifp->if_mtu);
560
561	/*
562	 * Send the NDIS version
563	 */
564	init_pkt = &net_dev->channel_init_packet;
565
566	memset(init_pkt, 0, sizeof(nvsp_msg));
567
568	/*
569	 * Updated to version 5.1, minimum, for VLAN per Haiyang
570	 */
571	ndis_version = NDIS_VERSION;
572
573	init_pkt->hdr.msg_type = nvsp_msg_1_type_send_ndis_vers;
574	init_pkt->msgs.vers_1_msgs.send_ndis_vers.ndis_major_vers =
575	    (ndis_version & 0xFFFF0000) >> 16;
576	init_pkt->msgs.vers_1_msgs.send_ndis_vers.ndis_minor_vers =
577	    ndis_version & 0xFFFF;
578
579	/* Send the init request */
580
581	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
582	    sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
583	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
584	if (ret != 0) {
585		goto cleanup;
586	}
587	/*
588	 * TODO:  BUGBUG - We have to wait for the above msg since the netvsp
589	 * uses KMCL which acknowledges packet (completion packet)
590	 * since our Vmbus always set the
591	 * HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED flag
592	 */
593	/* sema_wait(&NetVscChannel->channel_init_sema); */
594
595	/* Post the big receive buffer to NetVSP */
596	ret = hv_nv_init_rx_buffer_with_net_vsp(device);
597	if (ret == 0)
598		ret = hv_nv_init_send_buffer_with_net_vsp(device);
599
600cleanup:
601	return (ret);
602}
603
604/*
605 * Net VSC disconnect from VSP
606 */
607static void
608hv_nv_disconnect_from_vsp(netvsc_dev *net_dev)
609{
610	hv_nv_destroy_rx_buffer(net_dev);
611	hv_nv_destroy_send_buffer(net_dev);
612}
613
614/*
615 * Net VSC on device add
616 *
617 * Callback when the device belonging to this driver is added
618 */
619netvsc_dev *
620hv_nv_on_device_add(struct hv_device *device, void *additional_info)
621{
622	netvsc_dev *net_dev;
623	netvsc_packet *packet;
624	netvsc_packet *next_packet;
625	int i, ret = 0;
626
627	net_dev = hv_nv_alloc_net_device(device);
628	if (!net_dev)
629		goto cleanup;
630
631	/* Initialize the NetVSC channel extension */
632	net_dev->rx_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
633	mtx_init(&net_dev->rx_pkt_list_lock, "HV-RPL", NULL,
634	    MTX_SPIN | MTX_RECURSE);
635
636	net_dev->send_buf_size = NETVSC_SEND_BUFFER_SIZE;
637
638	/* Same effect as STAILQ_HEAD_INITIALIZER() static initializer */
639	STAILQ_INIT(&net_dev->myrx_packet_list);
640
641	/*
642	 * malloc a sufficient number of netvsc_packet buffers to hold
643	 * a packet list.  Add them to the netvsc device packet queue.
644	 */
645	for (i=0; i < NETVSC_RECEIVE_PACKETLIST_COUNT; i++) {
646		packet = malloc(sizeof(netvsc_packet) +
647		    (NETVSC_RECEIVE_SG_COUNT * sizeof(hv_vmbus_page_buffer)),
648		    M_DEVBUF, M_NOWAIT | M_ZERO);
649		if (!packet) {
650			break;
651		}
652		STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list, packet,
653		    mylist_entry);
654	}
655
656	sema_init(&net_dev->channel_init_sema, 0, "netdev_sema");
657
658	/*
659	 * Open the channel
660	 */
661	ret = hv_vmbus_channel_open(device->channel,
662	    NETVSC_DEVICE_RING_BUFFER_SIZE, NETVSC_DEVICE_RING_BUFFER_SIZE,
663	    NULL, 0, hv_nv_on_channel_callback, device);
664	if (ret != 0)
665		goto cleanup;
666
667	/*
668	 * Connect with the NetVsp
669	 */
670	ret = hv_nv_connect_to_vsp(device);
671	if (ret != 0)
672		goto close;
673
674	return (net_dev);
675
676close:
677	/* Now, we can close the channel safely */
678
679	hv_vmbus_channel_close(device->channel);
680
681cleanup:
682	/*
683	 * Free the packet buffers on the netvsc device packet queue.
684	 * Release other resources.
685	 */
686	if (net_dev) {
687		sema_destroy(&net_dev->channel_init_sema);
688
689		packet = STAILQ_FIRST(&net_dev->myrx_packet_list);
690		while (packet != NULL) {
691			next_packet = STAILQ_NEXT(packet, mylist_entry);
692			free(packet, M_DEVBUF);
693			packet = next_packet;
694		}
695		/* Reset the list to initial state */
696		STAILQ_INIT(&net_dev->myrx_packet_list);
697
698		mtx_destroy(&net_dev->rx_pkt_list_lock);
699
700		free(net_dev, M_DEVBUF);
701	}
702
703	return (NULL);
704}
705
706/*
707 * Net VSC on device remove
708 */
709int
710hv_nv_on_device_remove(struct hv_device *device, boolean_t destroy_channel)
711{
712	netvsc_packet *net_vsc_pkt;
713	netvsc_packet *next_net_vsc_pkt;
714	hn_softc_t *sc = device_get_softc(device->device);
715	netvsc_dev *net_dev = sc->net_dev;;
716
717	/* Stop outbound traffic ie sends and receives completions */
718	mtx_lock(&device->channel->inbound_lock);
719	net_dev->destroy = TRUE;
720	mtx_unlock(&device->channel->inbound_lock);
721
722	/* Wait for all send completions */
723	while (net_dev->num_outstanding_sends) {
724		DELAY(100);
725	}
726
727	hv_nv_disconnect_from_vsp(net_dev);
728
729	/* At this point, no one should be accessing net_dev except in here */
730
731	/* Now, we can close the channel safely */
732
733	if (!destroy_channel) {
734		device->channel->state =
735		    HV_CHANNEL_CLOSING_NONDESTRUCTIVE_STATE;
736	}
737
738	hv_vmbus_channel_close(device->channel);
739
740	/* Release all resources */
741	net_vsc_pkt = STAILQ_FIRST(&net_dev->myrx_packet_list);
742	while (net_vsc_pkt != NULL) {
743		next_net_vsc_pkt = STAILQ_NEXT(net_vsc_pkt, mylist_entry);
744		free(net_vsc_pkt, M_DEVBUF);
745		net_vsc_pkt = next_net_vsc_pkt;
746	}
747
748	/* Reset the list to initial state */
749	STAILQ_INIT(&net_dev->myrx_packet_list);
750
751	mtx_destroy(&net_dev->rx_pkt_list_lock);
752	sema_destroy(&net_dev->channel_init_sema);
753	free(net_dev, M_DEVBUF);
754
755	return (0);
756}
757
758/*
759 * Net VSC on send completion
760 */
761static void
762hv_nv_on_send_completion(struct hv_device *device, hv_vm_packet_descriptor *pkt)
763{
764	netvsc_dev *net_dev;
765	nvsp_msg *nvsp_msg_pkt;
766	netvsc_packet *net_vsc_pkt;
767
768	net_dev = hv_nv_get_inbound_net_device(device);
769	if (!net_dev) {
770		return;
771	}
772
773	nvsp_msg_pkt =
774	    (nvsp_msg *)((unsigned long)pkt + (pkt->data_offset8 << 3));
775
776	if (nvsp_msg_pkt->hdr.msg_type == nvsp_msg_type_init_complete
777		|| nvsp_msg_pkt->hdr.msg_type
778			== nvsp_msg_1_type_send_rx_buf_complete
779		|| nvsp_msg_pkt->hdr.msg_type
780			== nvsp_msg_1_type_send_send_buf_complete) {
781		/* Copy the response back */
782		memcpy(&net_dev->channel_init_packet, nvsp_msg_pkt,
783		    sizeof(nvsp_msg));
784		sema_post(&net_dev->channel_init_sema);
785	} else if (nvsp_msg_pkt->hdr.msg_type ==
786				   nvsp_msg_1_type_send_rndis_pkt_complete) {
787		/* Get the send context */
788		net_vsc_pkt =
789		    (netvsc_packet *)(unsigned long)pkt->transaction_id;
790
791		/* Notify the layer above us */
792		net_vsc_pkt->compl.send.on_send_completion(
793		    net_vsc_pkt->compl.send.send_completion_context);
794
795		atomic_subtract_int(&net_dev->num_outstanding_sends, 1);
796	}
797}
798
799/*
800 * Net VSC on send
801 * Sends a packet on the specified Hyper-V device.
802 * Returns 0 on success, non-zero on failure.
803 */
804int
805hv_nv_on_send(struct hv_device *device, netvsc_packet *pkt)
806{
807	netvsc_dev *net_dev;
808	nvsp_msg send_msg;
809	int ret;
810
811	net_dev = hv_nv_get_outbound_net_device(device);
812	if (!net_dev)
813		return (ENODEV);
814
815	send_msg.hdr.msg_type = nvsp_msg_1_type_send_rndis_pkt;
816	if (pkt->is_data_pkt) {
817		/* 0 is RMC_DATA */
818		send_msg.msgs.vers_1_msgs.send_rndis_pkt.chan_type = 0;
819	} else {
820		/* 1 is RMC_CONTROL */
821		send_msg.msgs.vers_1_msgs.send_rndis_pkt.chan_type = 1;
822	}
823
824	/* Not using send buffer section */
825	send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_idx =
826	    0xFFFFFFFF;
827	send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_size = 0;
828
829	if (pkt->page_buf_count) {
830		ret = hv_vmbus_channel_send_packet_pagebuffer(device->channel,
831		    pkt->page_buffers, pkt->page_buf_count,
832		    &send_msg, sizeof(nvsp_msg), (uint64_t)(uintptr_t)pkt);
833	} else {
834		ret = hv_vmbus_channel_send_packet(device->channel,
835		    &send_msg, sizeof(nvsp_msg), (uint64_t)(uintptr_t)pkt,
836		    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
837		    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
838	}
839
840	/* Record outstanding send only if send_packet() succeeded */
841	if (ret == 0)
842		atomic_add_int(&net_dev->num_outstanding_sends, 1);
843
844	return (ret);
845}
846
847/*
848 * Net VSC on receive
849 *
850 * In the FreeBSD Hyper-V virtual world, this function deals exclusively
851 * with virtual addresses.
852 */
853static void
854hv_nv_on_receive(struct hv_device *device, hv_vm_packet_descriptor *pkt)
855{
856	netvsc_dev *net_dev;
857	hv_vm_transfer_page_packet_header *vm_xfer_page_pkt;
858	nvsp_msg *nvsp_msg_pkt;
859	netvsc_packet *net_vsc_pkt = NULL;
860	unsigned long start;
861	xfer_page_packet *xfer_page_pkt = NULL;
862	STAILQ_HEAD(PKT_LIST, netvsc_packet_) mylist_head =
863	    STAILQ_HEAD_INITIALIZER(mylist_head);
864	int count = 0;
865	int i = 0;
866
867	net_dev = hv_nv_get_inbound_net_device(device);
868	if (!net_dev)
869		return;
870
871	/*
872	 * All inbound packets other than send completion should be
873	 * xfer page packet.
874	 */
875	if (pkt->type != HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES)
876		return;
877
878	nvsp_msg_pkt = (nvsp_msg *)((unsigned long)pkt
879		+ (pkt->data_offset8 << 3));
880
881	/* Make sure this is a valid nvsp packet */
882	if (nvsp_msg_pkt->hdr.msg_type != nvsp_msg_1_type_send_rndis_pkt)
883		return;
884
885	vm_xfer_page_pkt = (hv_vm_transfer_page_packet_header *)pkt;
886
887	if (vm_xfer_page_pkt->transfer_page_set_id
888		!= NETVSC_RECEIVE_BUFFER_ID) {
889		return;
890	}
891
892	STAILQ_INIT(&mylist_head);
893
894	/*
895	 * Grab free packets (range count + 1) to represent this xfer page
896	 * packet.  +1 to represent the xfer page packet itself.  We grab it
897	 * here so that we know exactly how many we can fulfill.
898	 */
899	mtx_lock_spin(&net_dev->rx_pkt_list_lock);
900	while (!STAILQ_EMPTY(&net_dev->myrx_packet_list)) {
901		net_vsc_pkt = STAILQ_FIRST(&net_dev->myrx_packet_list);
902		STAILQ_REMOVE_HEAD(&net_dev->myrx_packet_list, mylist_entry);
903
904		STAILQ_INSERT_TAIL(&mylist_head, net_vsc_pkt, mylist_entry);
905
906		if (++count == vm_xfer_page_pkt->range_count + 1)
907			break;
908	}
909
910	mtx_unlock_spin(&net_dev->rx_pkt_list_lock);
911
912	/*
913	 * We need at least 2 netvsc pkts (1 to represent the xfer page
914	 * and at least 1 for the range) i.e. we can handle some of the
915	 * xfer page packet ranges...
916	 */
917	if (count < 2) {
918		/* Return netvsc packet to the freelist */
919		mtx_lock_spin(&net_dev->rx_pkt_list_lock);
920		for (i=count; i != 0; i--) {
921			net_vsc_pkt = STAILQ_FIRST(&mylist_head);
922			STAILQ_REMOVE_HEAD(&mylist_head, mylist_entry);
923
924			STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list,
925			    net_vsc_pkt, mylist_entry);
926		}
927		mtx_unlock_spin(&net_dev->rx_pkt_list_lock);
928
929		hv_nv_send_receive_completion(device,
930		    vm_xfer_page_pkt->d.transaction_id);
931
932		return;
933	}
934
935	/* Take the first packet in the list */
936	xfer_page_pkt = (xfer_page_packet *)STAILQ_FIRST(&mylist_head);
937	STAILQ_REMOVE_HEAD(&mylist_head, mylist_entry);
938
939	/* This is how many data packets we can supply */
940	xfer_page_pkt->count = count - 1;
941
942	/* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */
943	for (i=0; i < (count - 1); i++) {
944		net_vsc_pkt = STAILQ_FIRST(&mylist_head);
945		STAILQ_REMOVE_HEAD(&mylist_head, mylist_entry);
946
947		/*
948		 * Initialize the netvsc packet
949		 */
950		net_vsc_pkt->xfer_page_pkt = xfer_page_pkt;
951		net_vsc_pkt->compl.rx.rx_completion_context = net_vsc_pkt;
952		net_vsc_pkt->device = device;
953		/* Save this so that we can send it back */
954		net_vsc_pkt->compl.rx.rx_completion_tid =
955		    vm_xfer_page_pkt->d.transaction_id;
956
957		net_vsc_pkt->tot_data_buf_len =
958		    vm_xfer_page_pkt->ranges[i].byte_count;
959		net_vsc_pkt->page_buf_count = 1;
960
961		net_vsc_pkt->page_buffers[0].length =
962		    vm_xfer_page_pkt->ranges[i].byte_count;
963
964		/* The virtual address of the packet in the receive buffer */
965		start = ((unsigned long)net_dev->rx_buf +
966		    vm_xfer_page_pkt->ranges[i].byte_offset);
967		start = ((unsigned long)start) & ~(PAGE_SIZE - 1);
968
969		/* Page number of the virtual page containing packet start */
970		net_vsc_pkt->page_buffers[0].pfn = start >> PAGE_SHIFT;
971
972		/* Calculate the page relative offset */
973		net_vsc_pkt->page_buffers[0].offset =
974		    vm_xfer_page_pkt->ranges[i].byte_offset & (PAGE_SIZE - 1);
975
976		/*
977		 * In this implementation, we are dealing with virtual
978		 * addresses exclusively.  Since we aren't using physical
979		 * addresses at all, we don't care if a packet crosses a
980		 * page boundary.  For this reason, the original code to
981		 * check for and handle page crossings has been removed.
982		 */
983
984		/*
985		 * Pass it to the upper layer.  The receive completion call
986		 * has been moved into this function.
987		 */
988		hv_rf_on_receive(device, net_vsc_pkt);
989
990		/*
991		 * Moved completion call back here so that all received
992		 * messages (not just data messages) will trigger a response
993		 * message back to the host.
994		 */
995		hv_nv_on_receive_completion(net_vsc_pkt);
996	}
997}
998
999/*
1000 * Net VSC send receive completion
1001 */
1002static void
1003hv_nv_send_receive_completion(struct hv_device *device, uint64_t tid)
1004{
1005	nvsp_msg rx_comp_msg;
1006	int retries = 0;
1007	int ret = 0;
1008
1009	rx_comp_msg.hdr.msg_type = nvsp_msg_1_type_send_rndis_pkt_complete;
1010
1011	/* Pass in the status */
1012	rx_comp_msg.msgs.vers_1_msgs.send_rndis_pkt_complete.status =
1013	    nvsp_status_success;
1014
1015retry_send_cmplt:
1016	/* Send the completion */
1017	ret = hv_vmbus_channel_send_packet(device->channel, &rx_comp_msg,
1018	    sizeof(nvsp_msg), tid, HV_VMBUS_PACKET_TYPE_COMPLETION, 0);
1019	if (ret == 0) {
1020		/* success */
1021		/* no-op */
1022	} else if (ret == EAGAIN) {
1023		/* no more room... wait a bit and attempt to retry 3 times */
1024		retries++;
1025
1026		if (retries < 4) {
1027			DELAY(100);
1028			goto retry_send_cmplt;
1029		}
1030	}
1031}
1032
1033/*
1034 * Net VSC on receive completion
1035 *
1036 * Send a receive completion packet to RNDIS device (ie NetVsp)
1037 */
1038void
1039hv_nv_on_receive_completion(void *context)
1040{
1041	netvsc_packet *packet = (netvsc_packet *)context;
1042	struct hv_device *device = (struct hv_device *)packet->device;
1043	netvsc_dev    *net_dev;
1044	uint64_t       tid = 0;
1045	boolean_t send_rx_completion = FALSE;
1046
1047	/*
1048	 * Even though it seems logical to do a hv_nv_get_outbound_net_device()
1049	 * here to send out receive completion, we are using
1050	 * hv_nv_get_inbound_net_device() since we may have disabled
1051	 * outbound traffic already.
1052	 */
1053	net_dev = hv_nv_get_inbound_net_device(device);
1054	if (net_dev == NULL)
1055		return;
1056
1057	/* Overloading use of the lock. */
1058	mtx_lock_spin(&net_dev->rx_pkt_list_lock);
1059
1060	packet->xfer_page_pkt->count--;
1061
1062	/*
1063	 * Last one in the line that represent 1 xfer page packet.
1064	 * Return the xfer page packet itself to the free list.
1065	 */
1066	if (packet->xfer_page_pkt->count == 0) {
1067		send_rx_completion = TRUE;
1068		tid = packet->compl.rx.rx_completion_tid;
1069		STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list,
1070		    (netvsc_packet *)(packet->xfer_page_pkt), mylist_entry);
1071	}
1072
1073	/* Put the packet back on the free list */
1074	STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list, packet, mylist_entry);
1075	mtx_unlock_spin(&net_dev->rx_pkt_list_lock);
1076
1077	/* Send a receive completion for the xfer page packet */
1078	if (send_rx_completion)
1079		hv_nv_send_receive_completion(device, tid);
1080}
1081
1082/*
1083 * Net VSC on channel callback
1084 */
1085static void
1086hv_nv_on_channel_callback(void *context)
1087{
1088	/* Fixme:  Magic number */
1089	const int net_pkt_size = 2048;
1090	struct hv_device *device = (struct hv_device *)context;
1091	netvsc_dev *net_dev;
1092	uint32_t bytes_rxed;
1093	uint64_t request_id;
1094	uint8_t  *packet;
1095	hv_vm_packet_descriptor *desc;
1096	uint8_t *buffer;
1097	int     bufferlen = net_pkt_size;
1098	int     ret = 0;
1099
1100	packet = malloc(net_pkt_size * sizeof(uint8_t), M_DEVBUF, M_NOWAIT);
1101	if (!packet)
1102		return;
1103
1104	buffer = packet;
1105
1106	net_dev = hv_nv_get_inbound_net_device(device);
1107	if (net_dev == NULL)
1108		goto out;
1109
1110	do {
1111		ret = hv_vmbus_channel_recv_packet_raw(device->channel,
1112		    buffer, bufferlen, &bytes_rxed, &request_id);
1113		if (ret == 0) {
1114			if (bytes_rxed > 0) {
1115				desc = (hv_vm_packet_descriptor *)buffer;
1116				switch (desc->type) {
1117				case HV_VMBUS_PACKET_TYPE_COMPLETION:
1118					hv_nv_on_send_completion(device, desc);
1119					break;
1120				case HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES:
1121					hv_nv_on_receive(device, desc);
1122					break;
1123				default:
1124					break;
1125				}
1126			} else {
1127				break;
1128			}
1129		} else if (ret == ENOBUFS) {
1130			/* Handle large packet */
1131			free(buffer, M_DEVBUF);
1132			buffer = malloc(bytes_rxed, M_DEVBUF, M_NOWAIT);
1133			if (buffer == NULL) {
1134				break;
1135			}
1136			bufferlen = bytes_rxed;
1137		}
1138	} while (1);
1139
1140out:
1141	free(buffer, M_DEVBUF);
1142}
1143
1144