1250199Sgrehan/*-
2250199Sgrehan * Copyright (c) 2010-2012 Citrix Inc.
3250199Sgrehan * Copyright (c) 2009-2012 Microsoft Corp.
4250199Sgrehan * Copyright (c) 2012 NetApp Inc.
5250199Sgrehan * All rights reserved.
6250199Sgrehan *
7250199Sgrehan * Redistribution and use in source and binary forms, with or without
8250199Sgrehan * modification, are permitted provided that the following conditions
9250199Sgrehan * are met:
10250199Sgrehan * 1. Redistributions of source code must retain the above copyright
11250199Sgrehan *    notice unmodified, this list of conditions, and the following
12250199Sgrehan *    disclaimer.
13250199Sgrehan * 2. Redistributions in binary form must reproduce the above copyright
14250199Sgrehan *    notice, this list of conditions and the following disclaimer in the
15250199Sgrehan *    documentation and/or other materials provided with the distribution.
16250199Sgrehan *
17250199Sgrehan * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18250199Sgrehan * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19250199Sgrehan * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20250199Sgrehan * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21250199Sgrehan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22250199Sgrehan * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23250199Sgrehan * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24250199Sgrehan * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25250199Sgrehan * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26250199Sgrehan * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27250199Sgrehan */
28250199Sgrehan
29250199Sgrehan/*-
30250199Sgrehan * Copyright (c) 2004-2006 Kip Macy
31250199Sgrehan * All rights reserved.
32250199Sgrehan *
33250199Sgrehan * Redistribution and use in source and binary forms, with or without
34250199Sgrehan * modification, are permitted provided that the following conditions
35250199Sgrehan * are met:
36250199Sgrehan * 1. Redistributions of source code must retain the above copyright
37250199Sgrehan *    notice, this list of conditions and the following disclaimer.
38250199Sgrehan * 2. Redistributions in binary form must reproduce the above copyright
39250199Sgrehan *    notice, this list of conditions and the following disclaimer in the
40250199Sgrehan *    documentation and/or other materials provided with the distribution.
41250199Sgrehan *
42250199Sgrehan * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43250199Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44250199Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45250199Sgrehan * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46250199Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47250199Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48250199Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49250199Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50250199Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51250199Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52250199Sgrehan * SUCH DAMAGE.
53250199Sgrehan */
54250199Sgrehan
55256363Sgrehan#include <sys/cdefs.h>
56256363Sgrehan__FBSDID("$FreeBSD: releng/10.3/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c 295948 2016-02-24 01:30:50Z sephe $");
57256363Sgrehan
58285236Swhu#include "opt_inet6.h"
59285236Swhu#include "opt_inet.h"
60285236Swhu
61250199Sgrehan#include <sys/param.h>
62250199Sgrehan#include <sys/systm.h>
63250199Sgrehan#include <sys/sockio.h>
64250199Sgrehan#include <sys/mbuf.h>
65250199Sgrehan#include <sys/malloc.h>
66250199Sgrehan#include <sys/module.h>
67250199Sgrehan#include <sys/kernel.h>
68250199Sgrehan#include <sys/socket.h>
69250199Sgrehan#include <sys/queue.h>
70250199Sgrehan#include <sys/lock.h>
71250199Sgrehan#include <sys/sx.h>
72295789Ssephe#include <sys/sysctl.h>
73250199Sgrehan
74250199Sgrehan#include <net/if.h>
75250199Sgrehan#include <net/if_arp.h>
76250199Sgrehan#include <net/ethernet.h>
77250199Sgrehan#include <net/if_dl.h>
78250199Sgrehan#include <net/if_media.h>
79250199Sgrehan
80250199Sgrehan#include <net/bpf.h>
81250199Sgrehan
82250199Sgrehan#include <net/if_types.h>
83250199Sgrehan#include <net/if_vlan_var.h>
84250199Sgrehan#include <net/if.h>
85250199Sgrehan
86250199Sgrehan#include <netinet/in_systm.h>
87250199Sgrehan#include <netinet/in.h>
88250199Sgrehan#include <netinet/ip.h>
89250199Sgrehan#include <netinet/if_ether.h>
90285236Swhu#include <netinet/tcp.h>
91285236Swhu#include <netinet/udp.h>
92285236Swhu#include <netinet/ip6.h>
93250199Sgrehan
94250199Sgrehan#include <vm/vm.h>
95250199Sgrehan#include <vm/vm_param.h>
96250199Sgrehan#include <vm/vm_kern.h>
97250199Sgrehan#include <vm/pmap.h>
98250199Sgrehan
99250199Sgrehan#include <machine/bus.h>
100250199Sgrehan#include <machine/resource.h>
101250199Sgrehan#include <machine/frame.h>
102250199Sgrehan#include <machine/vmparam.h>
103250199Sgrehan
104250199Sgrehan#include <sys/bus.h>
105250199Sgrehan#include <sys/rman.h>
106250199Sgrehan#include <sys/mutex.h>
107250199Sgrehan#include <sys/errno.h>
108250199Sgrehan#include <sys/types.h>
109250199Sgrehan#include <machine/atomic.h>
110250199Sgrehan
111250199Sgrehan#include <machine/intr_machdep.h>
112250199Sgrehan
113285236Swhu#include <machine/in_cksum.h>
114285236Swhu
115250199Sgrehan#include <dev/hyperv/include/hyperv.h>
116250199Sgrehan#include "hv_net_vsc.h"
117250199Sgrehan#include "hv_rndis.h"
118250199Sgrehan#include "hv_rndis_filter.h"
119250199Sgrehan
120250199Sgrehan
121250199Sgrehan/* Short for Hyper-V network interface */
122250199Sgrehan#define NETVSC_DEVNAME    "hn"
123250199Sgrehan
124250199Sgrehan/*
125250199Sgrehan * It looks like offset 0 of buf is reserved to hold the softc pointer.
126250199Sgrehan * The sc pointer evidently not needed, and is not presently populated.
127250199Sgrehan * The packet offset is where the netvsc_packet starts in the buffer.
128250199Sgrehan */
129250199Sgrehan#define HV_NV_SC_PTR_OFFSET_IN_BUF         0
130250199Sgrehan#define HV_NV_PACKET_OFFSET_IN_BUF         16
131250199Sgrehan
132295948Ssephe/* YYY should get it from the underlying channel */
133295948Ssephe#define HN_TX_DESC_CNT			512
134295948Ssephe
135295948Ssephe#define HN_RNDIS_MSG_LEN		\
136295948Ssephe    (sizeof(rndis_msg) +		\
137295948Ssephe     RNDIS_VLAN_PPI_SIZE +		\
138295948Ssephe     RNDIS_TSO_PPI_SIZE +		\
139295948Ssephe     RNDIS_CSUM_PPI_SIZE)
140295948Ssephe#define HN_RNDIS_MSG_BOUNDARY		PAGE_SIZE
141295948Ssephe#define HN_RNDIS_MSG_ALIGN		CACHE_LINE_SIZE
142295948Ssephe
143295948Ssephe#define HN_TX_DATA_BOUNDARY		PAGE_SIZE
144295948Ssephe#define HN_TX_DATA_MAXSIZE		IP_MAXPACKET
145295948Ssephe#define HN_TX_DATA_SEGSIZE		PAGE_SIZE
146295948Ssephe#define HN_TX_DATA_SEGCNT_MAX		\
147295948Ssephe    (NETVSC_PACKET_MAXPAGE - HV_RF_NUM_TX_RESERVED_PAGE_BUFS)
148295948Ssephe
149295948Ssephestruct hn_txdesc {
150295948Ssephe	SLIST_ENTRY(hn_txdesc) link;
151295948Ssephe	struct mbuf	*m;
152295948Ssephe	struct hn_softc	*sc;
153295948Ssephe	int		refs;
154295948Ssephe	uint32_t	flags;		/* HN_TXD_FLAG_ */
155295948Ssephe	netvsc_packet	netvsc_pkt;	/* XXX to be removed */
156295948Ssephe
157295948Ssephe	bus_dmamap_t	data_dmap;
158295948Ssephe
159295948Ssephe	bus_addr_t	rndis_msg_paddr;
160295948Ssephe	rndis_msg	*rndis_msg;
161295948Ssephe	bus_dmamap_t	rndis_msg_dmap;
162295948Ssephe};
163295948Ssephe
164295948Ssephe#define HN_TXD_FLAG_ONLIST	0x1
165295948Ssephe#define HN_TXD_FLAG_DMAMAP	0x2
166295948Ssephe
167292439Sroyger/*
168292439Sroyger * A unified flag for all outbound check sum flags is useful,
169292439Sroyger * and it helps avoiding unnecessary check sum calculation in
170292439Sroyger * network forwarding scenario.
171292439Sroyger */
172292439Sroyger#define HV_CSUM_FOR_OUTBOUND						\
173292439Sroyger    (CSUM_IP|CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP|CSUM_IP_TSO|		\
174292439Sroyger    CSUM_IP_ISCSI|CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP|		\
175292439Sroyger    CSUM_IP6_TSO|CSUM_IP6_ISCSI)
176250199Sgrehan
177295789Ssephe/* XXX move to netinet/tcp_lro.h */
178295789Ssephe#define HN_LRO_HIWAT_MAX				65535
179295789Ssephe#define HN_LRO_HIWAT_DEF				HN_LRO_HIWAT_MAX
180295789Ssephe/* YYY 2*MTU is a bit rough, but should be good enough. */
181295789Ssephe#define HN_LRO_HIWAT_MTULIM(ifp)			(2 * (ifp)->if_mtu)
182295789Ssephe#define HN_LRO_HIWAT_ISVALID(sc, hiwat)			\
183295789Ssephe    ((hiwat) >= HN_LRO_HIWAT_MTULIM((sc)->hn_ifp) ||	\
184295789Ssephe     (hiwat) <= HN_LRO_HIWAT_MAX)
185250199Sgrehan
186250199Sgrehan/*
187250199Sgrehan * Be aware that this sleepable mutex will exhibit WITNESS errors when
188250199Sgrehan * certain TCP and ARP code paths are taken.  This appears to be a
189250199Sgrehan * well-known condition, as all other drivers checked use a sleeping
190250199Sgrehan * mutex to protect their transmit paths.
191250199Sgrehan * Also Be aware that mutexes do not play well with semaphores, and there
192250199Sgrehan * is a conflicting semaphore in a certain channel code path.
193250199Sgrehan */
194250199Sgrehan#define NV_LOCK_INIT(_sc, _name) \
195250199Sgrehan	    mtx_init(&(_sc)->hn_lock, _name, MTX_NETWORK_LOCK, MTX_DEF)
196250199Sgrehan#define NV_LOCK(_sc)		mtx_lock(&(_sc)->hn_lock)
197250199Sgrehan#define NV_LOCK_ASSERT(_sc)	mtx_assert(&(_sc)->hn_lock, MA_OWNED)
198250199Sgrehan#define NV_UNLOCK(_sc)		mtx_unlock(&(_sc)->hn_lock)
199250199Sgrehan#define NV_LOCK_DESTROY(_sc)	mtx_destroy(&(_sc)->hn_lock)
200250199Sgrehan
201250199Sgrehan
202250199Sgrehan/*
203250199Sgrehan * Globals
204250199Sgrehan */
205250199Sgrehan
206250199Sgrehanint hv_promisc_mode = 0;    /* normal mode by default */
207250199Sgrehan
208295789Ssephe/* Trust tcp segements verification on host side. */
209295789Ssephestatic int hn_trust_hosttcp = 0;
210295789SsepheTUNABLE_INT("dev.hn.trust_hosttcp", &hn_trust_hosttcp);
211250199Sgrehan
212295948Ssephe#if __FreeBSD_version >= 1100045
213295948Ssephe/* Limit TSO burst size */
214295948Ssephestatic int hn_tso_maxlen = 0;
215295948SsepheTUNABLE_INT("dev.hn.tso_maxlen", &hn_tso_maxlen);
216295948Ssephe#endif
217295948Ssephe
218295948Ssephe/* Limit chimney send size */
219295948Ssephestatic int hn_tx_chimney_size = 0;
220295948SsepheTUNABLE_INT("dev.hn.tx_chimney_size", &hn_tx_chimney_size);
221295948Ssephe
222250199Sgrehan/*
223250199Sgrehan * Forward declarations
224250199Sgrehan */
225250199Sgrehanstatic void hn_stop(hn_softc_t *sc);
226250199Sgrehanstatic void hn_ifinit_locked(hn_softc_t *sc);
227250199Sgrehanstatic void hn_ifinit(void *xsc);
228250199Sgrehanstatic int  hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
229295948Ssephestatic void hn_start_locked(struct ifnet *ifp);
230250199Sgrehanstatic void hn_start(struct ifnet *ifp);
231295789Ssephestatic int hn_ifmedia_upd(struct ifnet *ifp);
232295789Ssephestatic void hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr);
233295789Ssephe#ifdef HN_LRO_HIWAT
234295789Ssephestatic int hn_lro_hiwat_sysctl(SYSCTL_HANDLER_ARGS);
235295789Ssephe#endif
236295948Ssephestatic int hn_tx_chimney_size_sysctl(SYSCTL_HANDLER_ARGS);
237295789Ssephestatic int hn_check_iplen(const struct mbuf *, int);
238295948Ssephestatic int hn_create_tx_ring(struct hn_softc *sc);
239295948Ssephestatic void hn_destroy_tx_ring(struct hn_softc *sc);
240250199Sgrehan
241295789Ssephestatic __inline void
242295789Ssephehn_set_lro_hiwat(struct hn_softc *sc, int hiwat)
243295789Ssephe{
244295789Ssephe	sc->hn_lro_hiwat = hiwat;
245295789Ssephe#ifdef HN_LRO_HIWAT
246295789Ssephe	sc->hn_lro.lro_hiwat = sc->hn_lro_hiwat;
247295789Ssephe#endif
248295789Ssephe}
249295789Ssephe
250285236Swhu/*
251285236Swhu * NetVsc get message transport protocol type
252285236Swhu */
253285236Swhustatic uint32_t get_transport_proto_type(struct mbuf *m_head)
254285236Swhu{
255285236Swhu	uint32_t ret_val = TRANSPORT_TYPE_NOT_IP;
256285236Swhu	uint16_t ether_type = 0;
257285236Swhu	int ether_len = 0;
258285236Swhu	struct ether_vlan_header *eh;
259285236Swhu#ifdef INET
260285236Swhu	struct ip *iph;
261285236Swhu#endif
262285236Swhu#ifdef INET6
263285236Swhu	struct ip6_hdr *ip6;
264285236Swhu#endif
265250199Sgrehan
266285236Swhu	eh = mtod(m_head, struct ether_vlan_header*);
267285236Swhu	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
268285236Swhu		ether_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
269285236Swhu		ether_type = eh->evl_proto;
270285236Swhu	} else {
271285236Swhu		ether_len = ETHER_HDR_LEN;
272285236Swhu		ether_type = eh->evl_encap_proto;
273285236Swhu	}
274285236Swhu
275285236Swhu	switch (ntohs(ether_type)) {
276285236Swhu#ifdef INET6
277285236Swhu	case ETHERTYPE_IPV6:
278285236Swhu		ip6 = (struct ip6_hdr *)(m_head->m_data + ether_len);
279285236Swhu
280285236Swhu		if (IPPROTO_TCP == ip6->ip6_nxt) {
281285236Swhu			ret_val = TRANSPORT_TYPE_IPV6_TCP;
282285236Swhu		} else if (IPPROTO_UDP == ip6->ip6_nxt) {
283285236Swhu			ret_val = TRANSPORT_TYPE_IPV6_UDP;
284285236Swhu		}
285285236Swhu		break;
286285236Swhu#endif
287285236Swhu#ifdef INET
288285236Swhu	case ETHERTYPE_IP:
289285236Swhu		iph = (struct ip *)(m_head->m_data + ether_len);
290285236Swhu
291285236Swhu		if (IPPROTO_TCP == iph->ip_p) {
292285236Swhu			ret_val = TRANSPORT_TYPE_IPV4_TCP;
293285236Swhu		} else if (IPPROTO_UDP == iph->ip_p) {
294285236Swhu			ret_val = TRANSPORT_TYPE_IPV4_UDP;
295285236Swhu		}
296285236Swhu		break;
297285236Swhu#endif
298285236Swhu	default:
299285236Swhu		ret_val = TRANSPORT_TYPE_NOT_IP;
300285236Swhu		break;
301285236Swhu	}
302285236Swhu
303285236Swhu	return (ret_val);
304285236Swhu}
305285236Swhu
306250199Sgrehanstatic int
307295789Ssephehn_ifmedia_upd(struct ifnet *ifp __unused)
308250199Sgrehan{
309295789Ssephe
310295789Ssephe	return EOPNOTSUPP;
311250199Sgrehan}
312250199Sgrehan
313250199Sgrehanstatic void
314295789Ssephehn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
315250199Sgrehan{
316295789Ssephe	struct hn_softc *sc = ifp->if_softc;
317250199Sgrehan
318295789Ssephe	ifmr->ifm_status = IFM_AVALID;
319295789Ssephe	ifmr->ifm_active = IFM_ETHER;
320295789Ssephe
321295789Ssephe	if (!sc->hn_carrier) {
322295789Ssephe		ifmr->ifm_active |= IFM_NONE;
323295789Ssephe		return;
324295789Ssephe	}
325295789Ssephe	ifmr->ifm_status |= IFM_ACTIVE;
326295789Ssephe	ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
327250199Sgrehan}
328250199Sgrehan
329250199Sgrehan/* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */
330250199Sgrehanstatic const hv_guid g_net_vsc_device_type = {
331250199Sgrehan	.data = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46,
332250199Sgrehan		0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E}
333250199Sgrehan};
334250199Sgrehan
335250199Sgrehan/*
336250199Sgrehan * Standard probe entry point.
337250199Sgrehan *
338250199Sgrehan */
339250199Sgrehanstatic int
340250199Sgrehannetvsc_probe(device_t dev)
341250199Sgrehan{
342250199Sgrehan	const char *p;
343250199Sgrehan
344250199Sgrehan	p = vmbus_get_type(dev);
345250199Sgrehan	if (!memcmp(p, &g_net_vsc_device_type.data, sizeof(hv_guid))) {
346250199Sgrehan		device_set_desc(dev, "Synthetic Network Interface");
347263853Sdelphij		if (bootverbose)
348263853Sdelphij			printf("Netvsc probe... DONE \n");
349250199Sgrehan
350273580Sdelphij		return (BUS_PROBE_DEFAULT);
351250199Sgrehan	}
352250199Sgrehan
353250199Sgrehan	return (ENXIO);
354250199Sgrehan}
355250199Sgrehan
356250199Sgrehan/*
357250199Sgrehan * Standard attach entry point.
358250199Sgrehan *
359250199Sgrehan * Called when the driver is loaded.  It allocates needed resources,
360250199Sgrehan * and initializes the "hardware" and software.
361250199Sgrehan */
362250199Sgrehanstatic int
363250199Sgrehannetvsc_attach(device_t dev)
364250199Sgrehan{
365250199Sgrehan	struct hv_device *device_ctx = vmbus_get_devctx(dev);
366250199Sgrehan	netvsc_device_info device_info;
367250199Sgrehan	hn_softc_t *sc;
368250199Sgrehan	int unit = device_get_unit(dev);
369295948Ssephe	struct ifnet *ifp = NULL;
370295789Ssephe	struct sysctl_oid_list *child;
371295789Ssephe	struct sysctl_ctx_list *ctx;
372295948Ssephe	int error;
373295948Ssephe#if __FreeBSD_version >= 1100045
374295948Ssephe	int tso_maxlen;
375295948Ssephe#endif
376250199Sgrehan
377250199Sgrehan	sc = device_get_softc(dev);
378250199Sgrehan	if (sc == NULL) {
379250199Sgrehan		return (ENOMEM);
380250199Sgrehan	}
381250199Sgrehan
382250199Sgrehan	bzero(sc, sizeof(hn_softc_t));
383250199Sgrehan	sc->hn_unit = unit;
384250199Sgrehan	sc->hn_dev = dev;
385295789Ssephe	sc->hn_lro_hiwat = HN_LRO_HIWAT_DEF;
386295789Ssephe	sc->hn_trust_hosttcp = hn_trust_hosttcp;
387250199Sgrehan
388295948Ssephe	error = hn_create_tx_ring(sc);
389295948Ssephe	if (error)
390295948Ssephe		goto failed;
391295948Ssephe
392250199Sgrehan	NV_LOCK_INIT(sc, "NetVSCLock");
393250199Sgrehan
394250199Sgrehan	sc->hn_dev_obj = device_ctx;
395250199Sgrehan
396250199Sgrehan	ifp = sc->hn_ifp = sc->arpcom.ac_ifp = if_alloc(IFT_ETHER);
397250199Sgrehan	ifp->if_softc = sc;
398250199Sgrehan
399250199Sgrehan	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
400250199Sgrehan	ifp->if_dunit = unit;
401250199Sgrehan	ifp->if_dname = NETVSC_DEVNAME;
402250199Sgrehan
403250199Sgrehan	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
404250199Sgrehan	ifp->if_ioctl = hn_ioctl;
405250199Sgrehan	ifp->if_start = hn_start;
406250199Sgrehan	ifp->if_init = hn_ifinit;
407250199Sgrehan	/* needed by hv_rf_on_device_add() code */
408250199Sgrehan	ifp->if_mtu = ETHERMTU;
409250199Sgrehan	IFQ_SET_MAXLEN(&ifp->if_snd, 512);
410250199Sgrehan	ifp->if_snd.ifq_drv_maxlen = 511;
411250199Sgrehan	IFQ_SET_READY(&ifp->if_snd);
412250199Sgrehan
413295789Ssephe	ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts);
414295789Ssephe	ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL);
415295789Ssephe	ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO);
416295789Ssephe	/* XXX ifmedia_set really should do this for us */
417295789Ssephe	sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media;
418295789Ssephe
419250199Sgrehan	/*
420250199Sgrehan	 * Tell upper layers that we support full VLAN capability.
421250199Sgrehan	 */
422250199Sgrehan	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
423285236Swhu	ifp->if_capabilities |=
424295789Ssephe	    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO |
425295789Ssephe	    IFCAP_LRO;
426285236Swhu	ifp->if_capenable |=
427295789Ssephe	    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO |
428295789Ssephe	    IFCAP_LRO;
429285928Swhu	/*
430285928Swhu	 * Only enable UDP checksum offloading when it is on 2012R2 or
431285928Swhu	 * later. UDP checksum offloading doesn't work on earlier
432285928Swhu	 * Windows releases.
433285928Swhu	 */
434285928Swhu	if (hv_vmbus_protocal_version >= HV_VMBUS_VERSION_WIN8_1)
435285928Swhu		ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
436285928Swhu	else
437285928Swhu		ifp->if_hwassist = CSUM_TCP | CSUM_TSO;
438250199Sgrehan
439295948Ssephe	error = hv_rf_on_device_add(device_ctx, &device_info);
440295948Ssephe	if (error)
441295948Ssephe		goto failed;
442250199Sgrehan
443250199Sgrehan	if (device_info.link_state == 0) {
444250199Sgrehan		sc->hn_carrier = 1;
445250199Sgrehan	}
446250199Sgrehan
447295789Ssephe#if defined(INET) || defined(INET6)
448295789Ssephe	tcp_lro_init(&sc->hn_lro);
449295789Ssephe	/* Driver private LRO settings */
450295789Ssephe	sc->hn_lro.ifp = ifp;
451295789Ssephe#ifdef HN_LRO_HIWAT
452295789Ssephe	sc->hn_lro.lro_hiwat = sc->hn_lro_hiwat;
453295789Ssephe#endif
454295789Ssephe#endif	/* INET || INET6 */
455295789Ssephe
456295948Ssephe#if __FreeBSD_version >= 1100045
457295948Ssephe	tso_maxlen = hn_tso_maxlen;
458295948Ssephe	if (tso_maxlen <= 0 || tso_maxlen > IP_MAXPACKET)
459295948Ssephe		tso_maxlen = IP_MAXPACKET;
460295948Ssephe
461295948Ssephe	ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX;
462295948Ssephe	ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
463295948Ssephe	ifp->if_hw_tsomax = tso_maxlen -
464295948Ssephe	    (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
465295948Ssephe#endif
466295948Ssephe
467250199Sgrehan	ether_ifattach(ifp, device_info.mac_addr);
468250199Sgrehan
469295948Ssephe#if __FreeBSD_version >= 1100045
470295948Ssephe	if_printf(ifp, "TSO: %u/%u/%u\n", ifp->if_hw_tsomax,
471295948Ssephe	    ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize);
472295948Ssephe#endif
473295948Ssephe
474295948Ssephe	sc->hn_tx_chimney_max = sc->net_dev->send_section_size;
475295948Ssephe	sc->hn_tx_chimney_size = sc->hn_tx_chimney_max;
476295948Ssephe	if (hn_tx_chimney_size > 0 &&
477295948Ssephe	    hn_tx_chimney_size < sc->hn_tx_chimney_max)
478295948Ssephe		sc->hn_tx_chimney_size = hn_tx_chimney_size;
479295948Ssephe
480295789Ssephe	ctx = device_get_sysctl_ctx(dev);
481295789Ssephe	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
482295789Ssephe
483295789Ssephe	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "lro_queued",
484295789Ssephe	    CTLFLAG_RW, &sc->hn_lro.lro_queued, 0, "LRO queued");
485295789Ssephe	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "lro_flushed",
486295789Ssephe	    CTLFLAG_RW, &sc->hn_lro.lro_flushed, 0, "LRO flushed");
487295789Ssephe	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "lro_tried",
488295789Ssephe	    CTLFLAG_RW, &sc->hn_lro_tried, "# of LRO tries");
489295789Ssephe#ifdef HN_LRO_HIWAT
490295789Ssephe	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_hiwat",
491295789Ssephe	    CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_lro_hiwat_sysctl,
492295789Ssephe	    "I", "LRO high watermark");
493295789Ssephe#endif
494295789Ssephe	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "trust_hosttcp",
495295789Ssephe	    CTLFLAG_RW, &sc->hn_trust_hosttcp, 0,
496295789Ssephe	    "Trust tcp segement verification on host side, "
497295789Ssephe	    "when csum info is missing");
498295789Ssephe	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "csum_ip",
499295789Ssephe	    CTLFLAG_RW, &sc->hn_csum_ip, "RXCSUM IP");
500295789Ssephe	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "csum_tcp",
501295789Ssephe	    CTLFLAG_RW, &sc->hn_csum_tcp, "RXCSUM TCP");
502295789Ssephe	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "csum_trusted",
503295789Ssephe	    CTLFLAG_RW, &sc->hn_csum_trusted,
504295789Ssephe	    "# of TCP segements that we trust host's csum verification");
505295789Ssephe	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "small_pkts",
506295789Ssephe	    CTLFLAG_RW, &sc->hn_small_pkts, "# of small packets received");
507295948Ssephe	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "no_txdescs",
508295948Ssephe	    CTLFLAG_RW, &sc->hn_no_txdescs, "# of times short of TX descs");
509295948Ssephe	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "send_failed",
510295948Ssephe	    CTLFLAG_RW, &sc->hn_send_failed, "# of hyper-v sending failure");
511295948Ssephe	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "txdma_failed",
512295948Ssephe	    CTLFLAG_RW, &sc->hn_txdma_failed, "# of TX DMA failure");
513295948Ssephe	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_collapsed",
514295948Ssephe	    CTLFLAG_RW, &sc->hn_tx_collapsed, "# of TX mbuf collapsed");
515295948Ssephe	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_chimney",
516295948Ssephe	    CTLFLAG_RW, &sc->hn_tx_chimney, "# of chimney send");
517295948Ssephe	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt",
518295948Ssephe	    CTLFLAG_RD, &sc->hn_txdesc_cnt, 0, "# of total TX descs");
519295948Ssephe	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail",
520295948Ssephe	    CTLFLAG_RD, &sc->hn_txdesc_avail, 0, "# of available TX descs");
521295948Ssephe	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max",
522295948Ssephe	    CTLFLAG_RD, &sc->hn_tx_chimney_max, 0,
523295948Ssephe	    "Chimney send packet size upper boundary");
524295948Ssephe	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size",
525295948Ssephe	    CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_tx_chimney_size_sysctl,
526295948Ssephe	    "I", "Chimney send packet size limit");
527295789Ssephe
528295789Ssephe	if (unit == 0) {
529295789Ssephe		struct sysctl_ctx_list *dc_ctx;
530295789Ssephe		struct sysctl_oid_list *dc_child;
531295789Ssephe		devclass_t dc;
532295789Ssephe
533295789Ssephe		/*
534295789Ssephe		 * Add sysctl nodes for devclass
535295789Ssephe		 */
536295789Ssephe		dc = device_get_devclass(dev);
537295789Ssephe		dc_ctx = devclass_get_sysctl_ctx(dc);
538295789Ssephe		dc_child = SYSCTL_CHILDREN(devclass_get_sysctl_tree(dc));
539295789Ssephe
540295789Ssephe		SYSCTL_ADD_INT(dc_ctx, dc_child, OID_AUTO, "trust_hosttcp",
541295789Ssephe		    CTLFLAG_RD, &hn_trust_hosttcp, 0,
542295789Ssephe		    "Trust tcp segement verification on host side, "
543295789Ssephe		    "when csum info is missing (global setting)");
544295948Ssephe		SYSCTL_ADD_INT(dc_ctx, dc_child, OID_AUTO, "tx_chimney_size",
545295948Ssephe		    CTLFLAG_RD, &hn_tx_chimney_size, 0,
546295948Ssephe		    "Chimney send packet size limit");
547295948Ssephe#if __FreeBSD_version >= 1100045
548295948Ssephe		SYSCTL_ADD_INT(dc_ctx, dc_child, OID_AUTO, "tso_maxlen",
549295948Ssephe		    CTLFLAG_RD, &hn_tso_maxlen, 0, "TSO burst limit");
550295948Ssephe#endif
551295789Ssephe	}
552295789Ssephe
553250199Sgrehan	return (0);
554295948Ssephefailed:
555295948Ssephe	hn_destroy_tx_ring(sc);
556295948Ssephe	if (ifp != NULL)
557295948Ssephe		if_free(ifp);
558295948Ssephe	return (error);
559250199Sgrehan}
560250199Sgrehan
561250199Sgrehan/*
562250199Sgrehan * Standard detach entry point
563250199Sgrehan */
564250199Sgrehanstatic int
565250199Sgrehannetvsc_detach(device_t dev)
566250199Sgrehan{
567295789Ssephe	struct hn_softc *sc = device_get_softc(dev);
568250199Sgrehan	struct hv_device *hv_device = vmbus_get_devctx(dev);
569250199Sgrehan
570263853Sdelphij	if (bootverbose)
571263853Sdelphij		printf("netvsc_detach\n");
572250199Sgrehan
573250199Sgrehan	/*
574250199Sgrehan	 * XXXKYS:  Need to clean up all our
575250199Sgrehan	 * driver state; this is the driver
576250199Sgrehan	 * unloading.
577250199Sgrehan	 */
578250199Sgrehan
579250199Sgrehan	/*
580250199Sgrehan	 * XXXKYS:  Need to stop outgoing traffic and unregister
581250199Sgrehan	 * the netdevice.
582250199Sgrehan	 */
583250199Sgrehan
584250199Sgrehan	hv_rf_on_device_remove(hv_device, HV_RF_NV_DESTROY_CHANNEL);
585250199Sgrehan
586295789Ssephe	ifmedia_removeall(&sc->hn_media);
587295789Ssephe#if defined(INET) || defined(INET6)
588295789Ssephe	tcp_lro_free(&sc->hn_lro);
589295789Ssephe#endif
590295948Ssephe	hn_destroy_tx_ring(sc);
591295789Ssephe
592250199Sgrehan	return (0);
593250199Sgrehan}
594250199Sgrehan
595250199Sgrehan/*
596250199Sgrehan * Standard shutdown entry point
597250199Sgrehan */
598250199Sgrehanstatic int
599250199Sgrehannetvsc_shutdown(device_t dev)
600250199Sgrehan{
601250199Sgrehan	return (0);
602250199Sgrehan}
603250199Sgrehan
604295948Ssephestatic __inline int
605295948Ssephehn_txdesc_dmamap_load(struct hn_softc *sc, struct hn_txdesc *txd,
606295948Ssephe    struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs)
607295948Ssephe{
608295948Ssephe	struct mbuf *m = *m_head;
609295948Ssephe	int error;
610295948Ssephe
611295948Ssephe	error = bus_dmamap_load_mbuf_sg(sc->hn_tx_data_dtag, txd->data_dmap,
612295948Ssephe	    m, segs, nsegs, BUS_DMA_NOWAIT);
613295948Ssephe	if (error == EFBIG) {
614295948Ssephe		struct mbuf *m_new;
615295948Ssephe
616295948Ssephe		m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX);
617295948Ssephe		if (m_new == NULL)
618295948Ssephe			return ENOBUFS;
619295948Ssephe		else
620295948Ssephe			*m_head = m = m_new;
621295948Ssephe		sc->hn_tx_collapsed++;
622295948Ssephe
623295948Ssephe		error = bus_dmamap_load_mbuf_sg(sc->hn_tx_data_dtag,
624295948Ssephe		    txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT);
625295948Ssephe	}
626295948Ssephe	if (!error) {
627295948Ssephe		bus_dmamap_sync(sc->hn_tx_data_dtag, txd->data_dmap,
628295948Ssephe		    BUS_DMASYNC_PREWRITE);
629295948Ssephe		txd->flags |= HN_TXD_FLAG_DMAMAP;
630295948Ssephe	}
631295948Ssephe	return error;
632295948Ssephe}
633295948Ssephe
634295948Ssephestatic __inline void
635295948Ssephehn_txdesc_dmamap_unload(struct hn_softc *sc, struct hn_txdesc *txd)
636295948Ssephe{
637295948Ssephe
638295948Ssephe	if (txd->flags & HN_TXD_FLAG_DMAMAP) {
639295948Ssephe		bus_dmamap_sync(sc->hn_tx_data_dtag,
640295948Ssephe		    txd->data_dmap, BUS_DMASYNC_POSTWRITE);
641295948Ssephe		bus_dmamap_unload(sc->hn_tx_data_dtag,
642295948Ssephe		    txd->data_dmap);
643295948Ssephe		txd->flags &= ~HN_TXD_FLAG_DMAMAP;
644295948Ssephe	}
645295948Ssephe}
646295948Ssephe
647295948Ssephestatic __inline int
648295948Ssephehn_txdesc_put(struct hn_softc *sc, struct hn_txdesc *txd)
649295948Ssephe{
650295948Ssephe
651295948Ssephe	KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0,
652295948Ssephe	    ("put an onlist txd %#x", txd->flags));
653295948Ssephe
654295948Ssephe	KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs));
655295948Ssephe	if (atomic_fetchadd_int(&txd->refs, -1) != 1)
656295948Ssephe		return 0;
657295948Ssephe
658295948Ssephe	hn_txdesc_dmamap_unload(sc, txd);
659295948Ssephe	if (txd->m != NULL) {
660295948Ssephe		m_freem(txd->m);
661295948Ssephe		txd->m = NULL;
662295948Ssephe	}
663295948Ssephe
664295948Ssephe	txd->flags |= HN_TXD_FLAG_ONLIST;
665295948Ssephe
666295948Ssephe	mtx_lock_spin(&sc->hn_txlist_spin);
667295948Ssephe	KASSERT(sc->hn_txdesc_avail >= 0 &&
668295948Ssephe	    sc->hn_txdesc_avail < sc->hn_txdesc_cnt,
669295948Ssephe	    ("txdesc_put: invalid txd avail %d", sc->hn_txdesc_avail));
670295948Ssephe	sc->hn_txdesc_avail++;
671295948Ssephe	SLIST_INSERT_HEAD(&sc->hn_txlist, txd, link);
672295948Ssephe	mtx_unlock_spin(&sc->hn_txlist_spin);
673295948Ssephe
674295948Ssephe	return 1;
675295948Ssephe}
676295948Ssephe
677295948Ssephestatic __inline struct hn_txdesc *
678295948Ssephehn_txdesc_get(struct hn_softc *sc)
679295948Ssephe{
680295948Ssephe	struct hn_txdesc *txd;
681295948Ssephe
682295948Ssephe	mtx_lock_spin(&sc->hn_txlist_spin);
683295948Ssephe	txd = SLIST_FIRST(&sc->hn_txlist);
684295948Ssephe	if (txd != NULL) {
685295948Ssephe		KASSERT(sc->hn_txdesc_avail > 0,
686295948Ssephe		    ("txdesc_get: invalid txd avail %d", sc->hn_txdesc_avail));
687295948Ssephe		sc->hn_txdesc_avail--;
688295948Ssephe		SLIST_REMOVE_HEAD(&sc->hn_txlist, link);
689295948Ssephe	}
690295948Ssephe	mtx_unlock_spin(&sc->hn_txlist_spin);
691295948Ssephe
692295948Ssephe	if (txd != NULL) {
693295948Ssephe		KASSERT(txd->m == NULL && txd->refs == 0 &&
694295948Ssephe		    (txd->flags & HN_TXD_FLAG_ONLIST), ("invalid txd"));
695295948Ssephe		txd->flags &= ~HN_TXD_FLAG_ONLIST;
696295948Ssephe		txd->refs = 1;
697295948Ssephe	}
698295948Ssephe	return txd;
699295948Ssephe}
700295948Ssephe
701295948Ssephestatic __inline void
702295948Ssephehn_txdesc_hold(struct hn_txdesc *txd)
703295948Ssephe{
704295948Ssephe
705295948Ssephe	/* 0->1 transition will never work */
706295948Ssephe	KASSERT(txd->refs > 0, ("invalid refs %d", txd->refs));
707295948Ssephe	atomic_add_int(&txd->refs, 1);
708295948Ssephe}
709295948Ssephe
710250199Sgrehan/*
711250199Sgrehan * Send completion processing
712250199Sgrehan *
713250199Sgrehan * Note:  It looks like offset 0 of buf is reserved to hold the softc
714250199Sgrehan * pointer.  The sc pointer is not currently needed in this function, and
715250199Sgrehan * it is not presently populated by the TX function.
716250199Sgrehan */
717250199Sgrehanvoid
718250199Sgrehannetvsc_xmit_completion(void *context)
719250199Sgrehan{
720295948Ssephe	netvsc_packet *packet = context;
721295948Ssephe	struct hn_txdesc *txd;
722295948Ssephe	struct hn_softc *sc;
723250199Sgrehan
724295948Ssephe	txd = (struct hn_txdesc *)(uintptr_t)
725295948Ssephe	    packet->compl.send.send_completion_tid;
726250199Sgrehan
727295948Ssephe	sc = txd->sc;
728295948Ssephe	sc->hn_txeof = 1;
729295948Ssephe	hn_txdesc_put(sc, txd);
730295948Ssephe}
731250199Sgrehan
732295948Ssephevoid
733295948Ssephenetvsc_channel_rollup(struct hv_device *device_ctx)
734295948Ssephe{
735295948Ssephe	struct hn_softc *sc = device_get_softc(device_ctx->device);
736295948Ssephe	struct ifnet *ifp;
737295948Ssephe
738295948Ssephe	if (!sc->hn_txeof)
739295948Ssephe		return;
740295948Ssephe
741295948Ssephe	sc->hn_txeof = 0;
742295948Ssephe	ifp = sc->hn_ifp;
743295948Ssephe	NV_LOCK(sc);
744295948Ssephe	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
745295948Ssephe	hn_start_locked(ifp);
746295948Ssephe	NV_UNLOCK(sc);
747250199Sgrehan}
748250199Sgrehan
749250199Sgrehan/*
750250199Sgrehan * Start a transmit of one or more packets
751250199Sgrehan */
752295948Ssephestatic void
753250199Sgrehanhn_start_locked(struct ifnet *ifp)
754250199Sgrehan{
755250199Sgrehan	hn_softc_t *sc = ifp->if_softc;
756250199Sgrehan	struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev);
757285236Swhu	netvsc_dev *net_dev = sc->net_dev;
758250199Sgrehan	netvsc_packet *packet;
759250199Sgrehan	struct mbuf *m_head, *m;
760285236Swhu	struct ether_vlan_header *eh;
761285236Swhu	rndis_msg *rndis_mesg;
762285236Swhu	rndis_packet *rndis_pkt;
763285236Swhu	rndis_per_packet_info *rppi;
764285236Swhu	ndis_8021q_info *rppi_vlan_info;
765285236Swhu	rndis_tcp_ip_csum_info *csum_info;
766285236Swhu	rndis_tcp_tso_info *tso_info;
767285236Swhu	int ether_len;
768285236Swhu	uint32_t rndis_msg_size = 0;
769285236Swhu	uint32_t trans_proto_type;
770285236Swhu	uint32_t send_buf_section_idx =
771285236Swhu	    NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX;
772250199Sgrehan
773295948Ssephe	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
774295948Ssephe	    IFF_DRV_RUNNING)
775295948Ssephe		return;
776250199Sgrehan
777295948Ssephe	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
778295948Ssephe		bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX];
779295948Ssephe		int error, nsegs, i, send_failed = 0;
780295948Ssephe		struct hn_txdesc *txd;
781250199Sgrehan
782295948Ssephe		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
783295948Ssephe		if (m_head == NULL)
784295948Ssephe			break;
785250199Sgrehan
786295948Ssephe		txd = hn_txdesc_get(sc);
787295948Ssephe		if (txd == NULL) {
788295948Ssephe			sc->hn_no_txdescs++;
789295948Ssephe			IF_PREPEND(&ifp->if_snd, m_head);
790295948Ssephe			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
791295948Ssephe			break;
792250199Sgrehan		}
793250199Sgrehan
794295948Ssephe		packet = &txd->netvsc_pkt;
795295948Ssephe		/* XXX not necessary */
796295948Ssephe		memset(packet, 0, sizeof(*packet));
797250199Sgrehan
798285236Swhu		packet->is_data_pkt = TRUE;
799285236Swhu
800285236Swhu		/* Initialize it from the mbuf */
801295948Ssephe		packet->tot_data_buf_len = m_head->m_pkthdr.len;
802285236Swhu
803250199Sgrehan		/*
804250199Sgrehan		 * extension points to the area reserved for the
805250199Sgrehan		 * rndis_filter_packet, which is placed just after
806250199Sgrehan		 * the netvsc_packet (and rppi struct, if present;
807250199Sgrehan		 * length is updated later).
808250199Sgrehan		 */
809295948Ssephe		rndis_mesg = txd->rndis_msg;
810295948Ssephe		/* XXX not necessary */
811295948Ssephe		memset(rndis_mesg, 0, HN_RNDIS_MSG_LEN);
812285236Swhu		rndis_mesg->ndis_msg_type = REMOTE_NDIS_PACKET_MSG;
813250199Sgrehan
814285236Swhu		rndis_pkt = &rndis_mesg->msg.packet;
815285236Swhu		rndis_pkt->data_offset = sizeof(rndis_packet);
816285236Swhu		rndis_pkt->data_length = packet->tot_data_buf_len;
817285236Swhu		rndis_pkt->per_pkt_info_offset = sizeof(rndis_packet);
818250199Sgrehan
819285236Swhu		rndis_msg_size = RNDIS_MESSAGE_SIZE(rndis_packet);
820250199Sgrehan
821250199Sgrehan		/*
822250199Sgrehan		 * If the Hyper-V infrastructure needs to embed a VLAN tag,
823250199Sgrehan		 * initialize netvsc_packet and rppi struct values as needed.
824250199Sgrehan		 */
825285236Swhu		if (m_head->m_flags & M_VLANTAG) {
826285236Swhu			/*
827285236Swhu			 * set up some additional fields so the Hyper-V infrastructure will stuff the VLAN tag
828285236Swhu			 * into the frame.
829285236Swhu			 */
830285236Swhu			rndis_msg_size += RNDIS_VLAN_PPI_SIZE;
831285236Swhu
832285236Swhu			rppi = hv_set_rppi_data(rndis_mesg, RNDIS_VLAN_PPI_SIZE,
833285236Swhu			    ieee_8021q_info);
834285236Swhu
835285236Swhu			/* VLAN info immediately follows rppi struct */
836285236Swhu			rppi_vlan_info = (ndis_8021q_info *)((char*)rppi +
837285236Swhu			    rppi->per_packet_info_offset);
838285236Swhu			/* FreeBSD does not support CFI or priority */
839285236Swhu			rppi_vlan_info->u1.s1.vlan_id =
840295948Ssephe			    m_head->m_pkthdr.ether_vtag & 0xfff;
841250199Sgrehan		}
842250199Sgrehan
843292439Sroyger		/* Only check the flags for outbound and ignore the ones for inbound */
844292439Sroyger		if (0 == (m_head->m_pkthdr.csum_flags & HV_CSUM_FOR_OUTBOUND)) {
845285236Swhu			goto pre_send;
846285236Swhu		}
847285236Swhu
848285236Swhu		eh = mtod(m_head, struct ether_vlan_header*);
849285236Swhu		if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
850285236Swhu			ether_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
851285236Swhu		} else {
852285236Swhu			ether_len = ETHER_HDR_LEN;
853285236Swhu		}
854285236Swhu
855285236Swhu		trans_proto_type = get_transport_proto_type(m_head);
856285236Swhu		if (TRANSPORT_TYPE_NOT_IP == trans_proto_type) {
857285236Swhu			goto pre_send;
858285236Swhu		}
859285236Swhu
860250199Sgrehan		/*
861285236Swhu		 * TSO packet needless to setup the send side checksum
862285236Swhu		 * offload.
863285236Swhu		 */
864285236Swhu		if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
865285236Swhu			goto do_tso;
866285236Swhu		}
867285236Swhu
868285236Swhu		/* setup checksum offload */
869285236Swhu		rndis_msg_size += RNDIS_CSUM_PPI_SIZE;
870285236Swhu		rppi = hv_set_rppi_data(rndis_mesg, RNDIS_CSUM_PPI_SIZE,
871285236Swhu		    tcpip_chksum_info);
872285236Swhu		csum_info = (rndis_tcp_ip_csum_info *)((char*)rppi +
873285236Swhu		    rppi->per_packet_info_offset);
874285236Swhu
875285236Swhu		if (trans_proto_type & (TYPE_IPV4 << 16)) {
876285236Swhu			csum_info->xmit.is_ipv4 = 1;
877285236Swhu		} else {
878285236Swhu			csum_info->xmit.is_ipv6 = 1;
879285236Swhu		}
880285236Swhu
881285236Swhu		if (trans_proto_type & TYPE_TCP) {
882285236Swhu			csum_info->xmit.tcp_csum = 1;
883285236Swhu			csum_info->xmit.tcp_header_offset = 0;
884285236Swhu		} else if (trans_proto_type & TYPE_UDP) {
885285236Swhu			csum_info->xmit.udp_csum = 1;
886285236Swhu		}
887285236Swhu
888285236Swhu		goto pre_send;
889285236Swhu
890285236Swhudo_tso:
891285236Swhu		/* setup TCP segmentation offload */
892285236Swhu		rndis_msg_size += RNDIS_TSO_PPI_SIZE;
893285236Swhu		rppi = hv_set_rppi_data(rndis_mesg, RNDIS_TSO_PPI_SIZE,
894285236Swhu		    tcp_large_send_info);
895285236Swhu
896285236Swhu		tso_info = (rndis_tcp_tso_info *)((char *)rppi +
897285236Swhu		    rppi->per_packet_info_offset);
898285236Swhu		tso_info->lso_v2_xmit.type =
899285236Swhu		    RNDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
900285236Swhu
901285236Swhu#ifdef INET
902285236Swhu		if (trans_proto_type & (TYPE_IPV4 << 16)) {
903285236Swhu			struct ip *ip =
904285236Swhu			    (struct ip *)(m_head->m_data + ether_len);
905285236Swhu			unsigned long iph_len = ip->ip_hl << 2;
906285236Swhu			struct tcphdr *th =
907285236Swhu			    (struct tcphdr *)((caddr_t)ip + iph_len);
908285236Swhu
909285236Swhu			tso_info->lso_v2_xmit.ip_version =
910285236Swhu			    RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV4;
911285236Swhu			ip->ip_len = 0;
912285236Swhu			ip->ip_sum = 0;
913285236Swhu
914285236Swhu			th->th_sum = in_pseudo(ip->ip_src.s_addr,
915285236Swhu			    ip->ip_dst.s_addr,
916285236Swhu			    htons(IPPROTO_TCP));
917285236Swhu		}
918285236Swhu#endif
919285236Swhu#if defined(INET6) && defined(INET)
920285236Swhu		else
921285236Swhu#endif
922285236Swhu#ifdef INET6
923285236Swhu		{
924285236Swhu			struct ip6_hdr *ip6 =
925285236Swhu			    (struct ip6_hdr *)(m_head->m_data + ether_len);
926285236Swhu			struct tcphdr *th = (struct tcphdr *)(ip6 + 1);
927285236Swhu
928285236Swhu			tso_info->lso_v2_xmit.ip_version =
929285236Swhu			    RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV6;
930285236Swhu			ip6->ip6_plen = 0;
931285236Swhu			th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
932285236Swhu		}
933285236Swhu#endif
934285236Swhu		tso_info->lso_v2_xmit.tcp_header_offset = 0;
935285236Swhu		tso_info->lso_v2_xmit.mss = m_head->m_pkthdr.tso_segsz;
936285236Swhu
937285236Swhupre_send:
938285236Swhu		rndis_mesg->msg_len = packet->tot_data_buf_len + rndis_msg_size;
939285236Swhu		packet->tot_data_buf_len = rndis_mesg->msg_len;
940285236Swhu
941285236Swhu		/* send packet with send buffer */
942295948Ssephe		if (packet->tot_data_buf_len < sc->hn_tx_chimney_size) {
943285236Swhu			send_buf_section_idx =
944285236Swhu			    hv_nv_get_next_send_section(net_dev);
945285236Swhu			if (send_buf_section_idx !=
946285236Swhu			    NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) {
947285236Swhu				char *dest = ((char *)net_dev->send_buf +
948285236Swhu				    send_buf_section_idx *
949285236Swhu				    net_dev->send_section_size);
950285236Swhu
951285236Swhu				memcpy(dest, rndis_mesg, rndis_msg_size);
952285236Swhu				dest += rndis_msg_size;
953285236Swhu				for (m = m_head; m != NULL; m = m->m_next) {
954285236Swhu					if (m->m_len) {
955285236Swhu						memcpy(dest,
956285236Swhu						    (void *)mtod(m, vm_offset_t),
957285236Swhu						    m->m_len);
958285236Swhu						dest += m->m_len;
959285236Swhu					}
960285236Swhu				}
961285236Swhu
962285236Swhu				packet->send_buf_section_idx =
963285236Swhu				    send_buf_section_idx;
964285236Swhu				packet->send_buf_section_size =
965285236Swhu				    packet->tot_data_buf_len;
966285236Swhu				packet->page_buf_count = 0;
967295948Ssephe				sc->hn_tx_chimney++;
968285236Swhu				goto do_send;
969285236Swhu			}
970285236Swhu		}
971285236Swhu
972295948Ssephe		error = hn_txdesc_dmamap_load(sc, txd, &m_head, segs, &nsegs);
973295948Ssephe		if (error) {
974295948Ssephe			int freed;
975295948Ssephe
976295948Ssephe			/*
977295948Ssephe			 * This mbuf is not linked w/ the txd yet, so free
978295948Ssephe			 * it now.
979295948Ssephe			 */
980295948Ssephe			m_freem(m_head);
981295948Ssephe			freed = hn_txdesc_put(sc, txd);
982295948Ssephe			KASSERT(freed != 0,
983295948Ssephe			    ("fail to free txd upon txdma error"));
984295948Ssephe
985295948Ssephe			sc->hn_txdma_failed++;
986295948Ssephe			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
987295948Ssephe			continue;
988295948Ssephe		}
989295948Ssephe
990295948Ssephe		packet->page_buf_count = nsegs +
991295948Ssephe		    HV_RF_NUM_TX_RESERVED_PAGE_BUFS;
992295948Ssephe
993285236Swhu		/* send packet with page buffer */
994295948Ssephe		packet->page_buffers[0].pfn = atop(txd->rndis_msg_paddr);
995285236Swhu		packet->page_buffers[0].offset =
996295948Ssephe		    txd->rndis_msg_paddr & PAGE_MASK;
997285236Swhu		packet->page_buffers[0].length = rndis_msg_size;
998285236Swhu
999285236Swhu		/*
1000250199Sgrehan		 * Fill the page buffers with mbuf info starting at index
1001250199Sgrehan		 * HV_RF_NUM_TX_RESERVED_PAGE_BUFS.
1002250199Sgrehan		 */
1003295948Ssephe		for (i = 0; i < nsegs; ++i) {
1004295948Ssephe			hv_vmbus_page_buffer *pb = &packet->page_buffers[
1005295948Ssephe			    i + HV_RF_NUM_TX_RESERVED_PAGE_BUFS];
1006295948Ssephe
1007295948Ssephe			pb->pfn = atop(segs[i].ds_addr);
1008295948Ssephe			pb->offset = segs[i].ds_addr & PAGE_MASK;
1009295948Ssephe			pb->length = segs[i].ds_len;
1010250199Sgrehan		}
1011250199Sgrehan
1012285236Swhu		packet->send_buf_section_idx =
1013285236Swhu		    NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX;
1014285236Swhu		packet->send_buf_section_size = 0;
1015285236Swhu
1016285236Swhudo_send:
1017295948Ssephe		txd->m = m_head;
1018285236Swhu
1019250199Sgrehan		/* Set the completion routine */
1020250199Sgrehan		packet->compl.send.on_send_completion = netvsc_xmit_completion;
1021250199Sgrehan		packet->compl.send.send_completion_context = packet;
1022295948Ssephe		packet->compl.send.send_completion_tid =
1023295948Ssephe		    (uint64_t)(uintptr_t)txd;
1024250199Sgrehan
1025295948Ssepheagain:
1026295948Ssephe		/*
1027295948Ssephe		 * Make sure that txd is not freed before ETHER_BPF_MTAP.
1028295948Ssephe		 */
1029295948Ssephe		hn_txdesc_hold(txd);
1030295948Ssephe		error = hv_nv_on_send(device_ctx, packet);
1031295948Ssephe		if (!error) {
1032295948Ssephe			ETHER_BPF_MTAP(ifp, m_head);
1033295948Ssephe			if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
1034295948Ssephe		}
1035295948Ssephe		hn_txdesc_put(sc, txd);
1036250199Sgrehan
1037295948Ssephe		if (__predict_false(error)) {
1038295948Ssephe			int freed;
1039250199Sgrehan
1040250199Sgrehan			/*
1041295948Ssephe			 * This should "really rarely" happen.
1042295948Ssephe			 *
1043295948Ssephe			 * XXX Too many RX to be acked or too many sideband
1044295948Ssephe			 * commands to run?  Ask netvsc_channel_rollup()
1045295948Ssephe			 * to kick start later.
1046250199Sgrehan			 */
1047295948Ssephe			sc->hn_txeof = 1;
1048295948Ssephe			if (!send_failed) {
1049295948Ssephe				sc->hn_send_failed++;
1050295948Ssephe				send_failed = 1;
1051295948Ssephe				/*
1052295948Ssephe				 * Try sending again after set hn_txeof;
1053295948Ssephe				 * in case that we missed the last
1054295948Ssephe				 * netvsc_channel_rollup().
1055295948Ssephe				 */
1056295948Ssephe				goto again;
1057295948Ssephe			}
1058295948Ssephe			if_printf(ifp, "send failed\n");
1059250199Sgrehan
1060250199Sgrehan			/*
1061295948Ssephe			 * This mbuf will be prepended, don't free it
1062295948Ssephe			 * in hn_txdesc_put(); only unload it from the
1063295948Ssephe			 * DMA map in hn_txdesc_put(), if it was loaded.
1064250199Sgrehan			 */
1065295948Ssephe			txd->m = NULL;
1066295948Ssephe			freed = hn_txdesc_put(sc, txd);
1067295948Ssephe			KASSERT(freed != 0,
1068295948Ssephe			    ("fail to free txd upon send error"));
1069250199Sgrehan
1070295948Ssephe			sc->hn_send_failed++;
1071295948Ssephe			IF_PREPEND(&ifp->if_snd, m_head);
1072295948Ssephe			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1073295948Ssephe			break;
1074250199Sgrehan		}
1075250199Sgrehan	}
1076250199Sgrehan}
1077250199Sgrehan
1078250199Sgrehan/*
1079250199Sgrehan * Link up/down notification
1080250199Sgrehan */
1081250199Sgrehanvoid
1082250199Sgrehannetvsc_linkstatus_callback(struct hv_device *device_obj, uint32_t status)
1083250199Sgrehan{
1084250199Sgrehan	hn_softc_t *sc = device_get_softc(device_obj->device);
1085250199Sgrehan
1086250199Sgrehan	if (sc == NULL) {
1087250199Sgrehan		return;
1088250199Sgrehan	}
1089250199Sgrehan
1090250199Sgrehan	if (status == 1) {
1091250199Sgrehan		sc->hn_carrier = 1;
1092250199Sgrehan	} else {
1093250199Sgrehan		sc->hn_carrier = 0;
1094250199Sgrehan	}
1095250199Sgrehan}
1096250199Sgrehan
1097250199Sgrehan/*
1098250199Sgrehan * Append the specified data to the indicated mbuf chain,
1099250199Sgrehan * Extend the mbuf chain if the new data does not fit in
1100250199Sgrehan * existing space.
1101250199Sgrehan *
1102250199Sgrehan * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c.
1103250199Sgrehan * There should be an equivalent in the kernel mbuf code,
1104250199Sgrehan * but there does not appear to be one yet.
1105250199Sgrehan *
1106250199Sgrehan * Differs from m_append() in that additional mbufs are
1107250199Sgrehan * allocated with cluster size MJUMPAGESIZE, and filled
1108250199Sgrehan * accordingly.
1109250199Sgrehan *
1110250199Sgrehan * Return 1 if able to complete the job; otherwise 0.
1111250199Sgrehan */
1112250199Sgrehanstatic int
1113250199Sgrehanhv_m_append(struct mbuf *m0, int len, c_caddr_t cp)
1114250199Sgrehan{
1115250199Sgrehan	struct mbuf *m, *n;
1116250199Sgrehan	int remainder, space;
1117250199Sgrehan
1118250199Sgrehan	for (m = m0; m->m_next != NULL; m = m->m_next)
1119250199Sgrehan		;
1120250199Sgrehan	remainder = len;
1121250199Sgrehan	space = M_TRAILINGSPACE(m);
1122250199Sgrehan	if (space > 0) {
1123250199Sgrehan		/*
1124250199Sgrehan		 * Copy into available space.
1125250199Sgrehan		 */
1126250199Sgrehan		if (space > remainder)
1127250199Sgrehan			space = remainder;
1128250199Sgrehan		bcopy(cp, mtod(m, caddr_t) + m->m_len, space);
1129250199Sgrehan		m->m_len += space;
1130250199Sgrehan		cp += space;
1131250199Sgrehan		remainder -= space;
1132250199Sgrehan	}
1133250199Sgrehan	while (remainder > 0) {
1134250199Sgrehan		/*
1135250199Sgrehan		 * Allocate a new mbuf; could check space
1136250199Sgrehan		 * and allocate a cluster instead.
1137250199Sgrehan		 */
1138250199Sgrehan		n = m_getjcl(M_DONTWAIT, m->m_type, 0, MJUMPAGESIZE);
1139250199Sgrehan		if (n == NULL)
1140250199Sgrehan			break;
1141250199Sgrehan		n->m_len = min(MJUMPAGESIZE, remainder);
1142250199Sgrehan		bcopy(cp, mtod(n, caddr_t), n->m_len);
1143250199Sgrehan		cp += n->m_len;
1144250199Sgrehan		remainder -= n->m_len;
1145250199Sgrehan		m->m_next = n;
1146250199Sgrehan		m = n;
1147250199Sgrehan	}
1148250199Sgrehan	if (m0->m_flags & M_PKTHDR)
1149250199Sgrehan		m0->m_pkthdr.len += len - remainder;
1150250199Sgrehan
1151250199Sgrehan	return (remainder == 0);
1152250199Sgrehan}
1153250199Sgrehan
1154250199Sgrehan
1155250199Sgrehan/*
1156250199Sgrehan * Called when we receive a data packet from the "wire" on the
1157250199Sgrehan * specified device
1158250199Sgrehan *
1159250199Sgrehan * Note:  This is no longer used as a callback
1160250199Sgrehan */
1161250199Sgrehanint
1162285236Swhunetvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet,
1163285236Swhu    rndis_tcp_ip_csum_info *csum_info)
1164250199Sgrehan{
1165250199Sgrehan	hn_softc_t *sc = (hn_softc_t *)device_get_softc(device_ctx->device);
1166250199Sgrehan	struct mbuf *m_new;
1167257513Sdelphij	struct ifnet *ifp;
1168285236Swhu	device_t dev = device_ctx->device;
1169295789Ssephe	int size, do_lro = 0;
1170250199Sgrehan
1171250199Sgrehan	if (sc == NULL) {
1172250199Sgrehan		return (0); /* TODO: KYS how can this be! */
1173250199Sgrehan	}
1174257513Sdelphij
1175257513Sdelphij	ifp = sc->hn_ifp;
1176250199Sgrehan
1177250199Sgrehan	ifp = sc->arpcom.ac_ifp;
1178250199Sgrehan
1179250199Sgrehan	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1180250199Sgrehan		return (0);
1181250199Sgrehan	}
1182250199Sgrehan
1183250199Sgrehan	/*
1184250199Sgrehan	 * Bail out if packet contains more data than configured MTU.
1185250199Sgrehan	 */
1186250199Sgrehan	if (packet->tot_data_buf_len > (ifp->if_mtu + ETHER_HDR_LEN)) {
1187250199Sgrehan		return (0);
1188295789Ssephe	} else if (packet->tot_data_buf_len <= MHLEN) {
1189295789Ssephe		m_new = m_gethdr(M_NOWAIT, MT_DATA);
1190295789Ssephe		if (m_new == NULL)
1191295789Ssephe			return (0);
1192295789Ssephe		memcpy(mtod(m_new, void *), packet->data,
1193295789Ssephe		    packet->tot_data_buf_len);
1194295789Ssephe		m_new->m_pkthdr.len = m_new->m_len = packet->tot_data_buf_len;
1195295789Ssephe		sc->hn_small_pkts++;
1196295789Ssephe	} else {
1197295789Ssephe		/*
1198295789Ssephe		 * Get an mbuf with a cluster.  For packets 2K or less,
1199295789Ssephe		 * get a standard 2K cluster.  For anything larger, get a
1200295789Ssephe		 * 4K cluster.  Any buffers larger than 4K can cause problems
1201295789Ssephe		 * if looped around to the Hyper-V TX channel, so avoid them.
1202295789Ssephe		 */
1203295789Ssephe		size = MCLBYTES;
1204295789Ssephe		if (packet->tot_data_buf_len > MCLBYTES) {
1205295789Ssephe			/* 4096 */
1206295789Ssephe			size = MJUMPAGESIZE;
1207295789Ssephe		}
1208250199Sgrehan
1209295789Ssephe		m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size);
1210295789Ssephe		if (m_new == NULL) {
1211295789Ssephe			device_printf(dev, "alloc mbuf failed.\n");
1212295789Ssephe			return (0);
1213295789Ssephe		}
1214250199Sgrehan
1215295789Ssephe		hv_m_append(m_new, packet->tot_data_buf_len, packet->data);
1216250199Sgrehan	}
1217285236Swhu	m_new->m_pkthdr.rcvif = ifp;
1218250199Sgrehan
1219285236Swhu	/* receive side checksum offload */
1220285236Swhu	if (NULL != csum_info) {
1221285236Swhu		/* IP csum offload */
1222285236Swhu		if (csum_info->receive.ip_csum_succeeded) {
1223285236Swhu			m_new->m_pkthdr.csum_flags |=
1224285236Swhu			    (CSUM_IP_CHECKED | CSUM_IP_VALID);
1225295789Ssephe			sc->hn_csum_ip++;
1226285236Swhu		}
1227285236Swhu
1228285236Swhu		/* TCP csum offload */
1229285236Swhu		if (csum_info->receive.tcp_csum_succeeded) {
1230285236Swhu			m_new->m_pkthdr.csum_flags |=
1231285236Swhu			    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1232285236Swhu			m_new->m_pkthdr.csum_data = 0xffff;
1233295789Ssephe			sc->hn_csum_tcp++;
1234285236Swhu		}
1235295789Ssephe
1236295789Ssephe		if (csum_info->receive.ip_csum_succeeded &&
1237295789Ssephe		    csum_info->receive.tcp_csum_succeeded)
1238295789Ssephe			do_lro = 1;
1239295789Ssephe	} else {
1240295789Ssephe		const struct ether_header *eh;
1241295789Ssephe		uint16_t etype;
1242295789Ssephe		int hoff;
1243295789Ssephe
1244295789Ssephe		hoff = sizeof(*eh);
1245295789Ssephe		if (m_new->m_len < hoff)
1246295789Ssephe			goto skip;
1247295789Ssephe		eh = mtod(m_new, struct ether_header *);
1248295789Ssephe		etype = ntohs(eh->ether_type);
1249295789Ssephe		if (etype == ETHERTYPE_VLAN) {
1250295789Ssephe			const struct ether_vlan_header *evl;
1251295789Ssephe
1252295789Ssephe			hoff = sizeof(*evl);
1253295789Ssephe			if (m_new->m_len < hoff)
1254295789Ssephe				goto skip;
1255295789Ssephe			evl = mtod(m_new, struct ether_vlan_header *);
1256295789Ssephe			etype = ntohs(evl->evl_proto);
1257295789Ssephe		}
1258295789Ssephe
1259295789Ssephe		if (etype == ETHERTYPE_IP) {
1260295789Ssephe			int pr;
1261295789Ssephe
1262295789Ssephe			pr = hn_check_iplen(m_new, hoff);
1263295789Ssephe			if (pr == IPPROTO_TCP) {
1264295789Ssephe				if (sc->hn_trust_hosttcp) {
1265295789Ssephe					sc->hn_csum_trusted++;
1266295789Ssephe					m_new->m_pkthdr.csum_flags |=
1267295789Ssephe					   (CSUM_IP_CHECKED | CSUM_IP_VALID |
1268295789Ssephe					    CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1269295789Ssephe					m_new->m_pkthdr.csum_data = 0xffff;
1270295789Ssephe				}
1271295789Ssephe				/* Rely on SW csum verification though... */
1272295789Ssephe				do_lro = 1;
1273295789Ssephe			}
1274295789Ssephe		}
1275250199Sgrehan	}
1276295789Ssepheskip:
1277250199Sgrehan	if ((packet->vlan_tci != 0) &&
1278285236Swhu	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) {
1279250199Sgrehan		m_new->m_pkthdr.ether_vtag = packet->vlan_tci;
1280250199Sgrehan		m_new->m_flags |= M_VLANTAG;
1281250199Sgrehan	}
1282250199Sgrehan
1283250199Sgrehan	/*
1284250199Sgrehan	 * Note:  Moved RX completion back to hv_nv_on_receive() so all
1285250199Sgrehan	 * messages (not just data messages) will trigger a response.
1286250199Sgrehan	 */
1287250199Sgrehan
1288250199Sgrehan	ifp->if_ipackets++;
1289250199Sgrehan
1290295789Ssephe	if ((ifp->if_capenable & IFCAP_LRO) && do_lro) {
1291295789Ssephe#if defined(INET) || defined(INET6)
1292295789Ssephe		struct lro_ctrl *lro = &sc->hn_lro;
1293295789Ssephe
1294295789Ssephe		if (lro->lro_cnt) {
1295295789Ssephe			sc->hn_lro_tried++;
1296295789Ssephe			if (tcp_lro_rx(lro, m_new, 0) == 0) {
1297295789Ssephe				/* DONE! */
1298295789Ssephe				return 0;
1299295789Ssephe			}
1300295789Ssephe		}
1301295789Ssephe#endif
1302295789Ssephe	}
1303295789Ssephe
1304250199Sgrehan	/* We're not holding the lock here, so don't release it */
1305250199Sgrehan	(*ifp->if_input)(ifp, m_new);
1306250199Sgrehan
1307250199Sgrehan	return (0);
1308250199Sgrehan}
1309250199Sgrehan
1310295789Ssephevoid
1311295789Ssephenetvsc_recv_rollup(struct hv_device *device_ctx)
1312295789Ssephe{
1313295789Ssephe#if defined(INET) || defined(INET6)
1314295789Ssephe	hn_softc_t *sc = device_get_softc(device_ctx->device);
1315295789Ssephe	struct lro_ctrl *lro = &sc->hn_lro;
1316295789Ssephe	struct lro_entry *queued;
1317295789Ssephe
1318295789Ssephe	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1319295789Ssephe		SLIST_REMOVE_HEAD(&lro->lro_active, next);
1320295789Ssephe		tcp_lro_flush(lro, queued);
1321295789Ssephe	}
1322295789Ssephe#endif
1323295789Ssephe}
1324295789Ssephe
1325250199Sgrehan/*
1326256363Sgrehan * Rules for using sc->temp_unusable:
1327256363Sgrehan * 1.  sc->temp_unusable can only be read or written while holding NV_LOCK()
1328256363Sgrehan * 2.  code reading sc->temp_unusable under NV_LOCK(), and finding
1329256363Sgrehan *     sc->temp_unusable set, must release NV_LOCK() and exit
1330256363Sgrehan * 3.  to retain exclusive control of the interface,
1331256363Sgrehan *     sc->temp_unusable must be set by code before releasing NV_LOCK()
1332256363Sgrehan * 4.  only code setting sc->temp_unusable can clear sc->temp_unusable
1333256363Sgrehan * 5.  code setting sc->temp_unusable must eventually clear sc->temp_unusable
1334256363Sgrehan */
1335256363Sgrehan
1336256363Sgrehan/*
1337250199Sgrehan * Standard ioctl entry point.  Called when the user wants to configure
1338250199Sgrehan * the interface.
1339250199Sgrehan */
1340250199Sgrehanstatic int
1341250199Sgrehanhn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1342250199Sgrehan{
1343250199Sgrehan	hn_softc_t *sc = ifp->if_softc;
1344250199Sgrehan	struct ifreq *ifr = (struct ifreq *)data;
1345285236Swhu#ifdef INET
1346285236Swhu	struct ifaddr *ifa = (struct ifaddr *)data;
1347285236Swhu#endif
1348250199Sgrehan	netvsc_device_info device_info;
1349250199Sgrehan	struct hv_device *hn_dev;
1350250199Sgrehan	int mask, error = 0;
1351256363Sgrehan	int retry_cnt = 500;
1352256363Sgrehan
1353250199Sgrehan	switch(cmd) {
1354250199Sgrehan
1355250199Sgrehan	case SIOCSIFADDR:
1356283100Sdelphij#ifdef INET
1357283100Sdelphij		if (ifa->ifa_addr->sa_family == AF_INET) {
1358283100Sdelphij			ifp->if_flags |= IFF_UP;
1359283100Sdelphij			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1360283100Sdelphij				hn_ifinit(sc);
1361283100Sdelphij			arp_ifinit(ifp, ifa);
1362283100Sdelphij		} else
1363283100Sdelphij#endif
1364250199Sgrehan		error = ether_ioctl(ifp, cmd, data);
1365250199Sgrehan		break;
1366250199Sgrehan	case SIOCSIFMTU:
1367250199Sgrehan		hn_dev = vmbus_get_devctx(sc->hn_dev);
1368250199Sgrehan
1369256363Sgrehan		/* Check MTU value change */
1370256363Sgrehan		if (ifp->if_mtu == ifr->ifr_mtu)
1371256363Sgrehan			break;
1372250199Sgrehan
1373250199Sgrehan		if (ifr->ifr_mtu > NETVSC_MAX_CONFIGURABLE_MTU) {
1374250199Sgrehan			error = EINVAL;
1375250199Sgrehan			break;
1376250199Sgrehan		}
1377256363Sgrehan
1378250199Sgrehan		/* Obtain and record requested MTU */
1379250199Sgrehan		ifp->if_mtu = ifr->ifr_mtu;
1380295789Ssephe		/*
1381295789Ssephe		 * Make sure that LRO high watermark is still valid,
1382295789Ssephe		 * after MTU change (the 2*MTU limit).
1383295789Ssephe		 */
1384295789Ssephe		if (!HN_LRO_HIWAT_ISVALID(sc, sc->hn_lro_hiwat))
1385295789Ssephe			hn_set_lro_hiwat(sc, HN_LRO_HIWAT_MTULIM(ifp));
1386295789Ssephe
1387256363Sgrehan		do {
1388256363Sgrehan			NV_LOCK(sc);
1389256363Sgrehan			if (!sc->temp_unusable) {
1390256363Sgrehan				sc->temp_unusable = TRUE;
1391256363Sgrehan				retry_cnt = -1;
1392256363Sgrehan			}
1393256363Sgrehan			NV_UNLOCK(sc);
1394256363Sgrehan			if (retry_cnt > 0) {
1395256363Sgrehan				retry_cnt--;
1396256363Sgrehan				DELAY(5 * 1000);
1397256363Sgrehan			}
1398256363Sgrehan		} while (retry_cnt > 0);
1399250199Sgrehan
1400256363Sgrehan		if (retry_cnt == 0) {
1401256363Sgrehan			error = EINVAL;
1402256363Sgrehan			break;
1403256363Sgrehan		}
1404256363Sgrehan
1405256363Sgrehan		/* We must remove and add back the device to cause the new
1406250199Sgrehan		 * MTU to take effect.  This includes tearing down, but not
1407250199Sgrehan		 * deleting the channel, then bringing it back up.
1408250199Sgrehan		 */
1409250199Sgrehan		error = hv_rf_on_device_remove(hn_dev, HV_RF_NV_RETAIN_CHANNEL);
1410250199Sgrehan		if (error) {
1411256363Sgrehan			NV_LOCK(sc);
1412256363Sgrehan			sc->temp_unusable = FALSE;
1413250199Sgrehan			NV_UNLOCK(sc);
1414250199Sgrehan			break;
1415250199Sgrehan		}
1416250199Sgrehan		error = hv_rf_on_device_add(hn_dev, &device_info);
1417250199Sgrehan		if (error) {
1418256363Sgrehan			NV_LOCK(sc);
1419256363Sgrehan			sc->temp_unusable = FALSE;
1420250199Sgrehan			NV_UNLOCK(sc);
1421250199Sgrehan			break;
1422250199Sgrehan		}
1423250199Sgrehan
1424295948Ssephe		sc->hn_tx_chimney_max = sc->net_dev->send_section_size;
1425295948Ssephe		if (sc->hn_tx_chimney_size > sc->hn_tx_chimney_max)
1426295948Ssephe			sc->hn_tx_chimney_size = sc->hn_tx_chimney_max;
1427250199Sgrehan		hn_ifinit_locked(sc);
1428250199Sgrehan
1429256363Sgrehan		NV_LOCK(sc);
1430256363Sgrehan		sc->temp_unusable = FALSE;
1431250199Sgrehan		NV_UNLOCK(sc);
1432250199Sgrehan		break;
1433250199Sgrehan	case SIOCSIFFLAGS:
1434256363Sgrehan		do {
1435256363Sgrehan                       NV_LOCK(sc);
1436256363Sgrehan                       if (!sc->temp_unusable) {
1437256363Sgrehan                               sc->temp_unusable = TRUE;
1438256363Sgrehan                               retry_cnt = -1;
1439256363Sgrehan                       }
1440256363Sgrehan                       NV_UNLOCK(sc);
1441256363Sgrehan                       if (retry_cnt > 0) {
1442256363Sgrehan                      	        retry_cnt--;
1443256363Sgrehan                        	DELAY(5 * 1000);
1444256363Sgrehan                       }
1445256363Sgrehan                } while (retry_cnt > 0);
1446256363Sgrehan
1447256363Sgrehan                if (retry_cnt == 0) {
1448256363Sgrehan                       error = EINVAL;
1449256363Sgrehan                       break;
1450256363Sgrehan                }
1451256363Sgrehan
1452250199Sgrehan		if (ifp->if_flags & IFF_UP) {
1453250199Sgrehan			/*
1454250199Sgrehan			 * If only the state of the PROMISC flag changed,
1455250199Sgrehan			 * then just use the 'set promisc mode' command
1456250199Sgrehan			 * instead of reinitializing the entire NIC. Doing
1457250199Sgrehan			 * a full re-init means reloading the firmware and
1458250199Sgrehan			 * waiting for it to start up, which may take a
1459250199Sgrehan			 * second or two.
1460250199Sgrehan			 */
1461250199Sgrehan#ifdef notyet
1462250199Sgrehan			/* Fixme:  Promiscuous mode? */
1463250199Sgrehan			if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1464250199Sgrehan			    ifp->if_flags & IFF_PROMISC &&
1465250199Sgrehan			    !(sc->hn_if_flags & IFF_PROMISC)) {
1466250199Sgrehan				/* do something here for Hyper-V */
1467250199Sgrehan			} else if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1468256363Sgrehan			    !(ifp->if_flags & IFF_PROMISC) &&
1469256363Sgrehan			    sc->hn_if_flags & IFF_PROMISC) {
1470250199Sgrehan				/* do something here for Hyper-V */
1471250199Sgrehan			} else
1472250199Sgrehan#endif
1473250199Sgrehan				hn_ifinit_locked(sc);
1474250199Sgrehan		} else {
1475250199Sgrehan			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1476250199Sgrehan				hn_stop(sc);
1477250199Sgrehan			}
1478250199Sgrehan		}
1479256363Sgrehan		NV_LOCK(sc);
1480256363Sgrehan		sc->temp_unusable = FALSE;
1481256363Sgrehan		NV_UNLOCK(sc);
1482250199Sgrehan		sc->hn_if_flags = ifp->if_flags;
1483250199Sgrehan		error = 0;
1484250199Sgrehan		break;
1485250199Sgrehan	case SIOCSIFCAP:
1486250199Sgrehan		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1487285236Swhu		if (mask & IFCAP_TXCSUM) {
1488285236Swhu			if (IFCAP_TXCSUM & ifp->if_capenable) {
1489285236Swhu				ifp->if_capenable &= ~IFCAP_TXCSUM;
1490285236Swhu				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
1491250199Sgrehan			} else {
1492285236Swhu				ifp->if_capenable |= IFCAP_TXCSUM;
1493285928Swhu				/*
1494285928Swhu				 * Only enable UDP checksum offloading on
1495285928Swhu				 * Windows Server 2012R2 or later releases.
1496285928Swhu				 */
1497285928Swhu				if (hv_vmbus_protocal_version >=
1498285928Swhu				    HV_VMBUS_VERSION_WIN8_1) {
1499285928Swhu					ifp->if_hwassist |=
1500285928Swhu					    (CSUM_TCP | CSUM_UDP);
1501285928Swhu				} else {
1502285928Swhu					ifp->if_hwassist |= CSUM_TCP;
1503285928Swhu				}
1504250199Sgrehan			}
1505250199Sgrehan		}
1506285236Swhu
1507285236Swhu		if (mask & IFCAP_RXCSUM) {
1508285236Swhu			if (IFCAP_RXCSUM & ifp->if_capenable) {
1509285236Swhu				ifp->if_capenable &= ~IFCAP_RXCSUM;
1510285236Swhu			} else {
1511285236Swhu				ifp->if_capenable |= IFCAP_RXCSUM;
1512285236Swhu			}
1513285236Swhu		}
1514295789Ssephe		if (mask & IFCAP_LRO)
1515295789Ssephe			ifp->if_capenable ^= IFCAP_LRO;
1516285236Swhu
1517285236Swhu		if (mask & IFCAP_TSO4) {
1518285236Swhu			ifp->if_capenable ^= IFCAP_TSO4;
1519285236Swhu			ifp->if_hwassist ^= CSUM_IP_TSO;
1520285236Swhu		}
1521285236Swhu
1522285236Swhu		if (mask & IFCAP_TSO6) {
1523285236Swhu			ifp->if_capenable ^= IFCAP_TSO6;
1524285236Swhu			ifp->if_hwassist ^= CSUM_IP6_TSO;
1525285236Swhu		}
1526285236Swhu
1527250199Sgrehan		error = 0;
1528250199Sgrehan		break;
1529250199Sgrehan	case SIOCADDMULTI:
1530250199Sgrehan	case SIOCDELMULTI:
1531250199Sgrehan#ifdef notyet
1532250199Sgrehan		/* Fixme:  Multicast mode? */
1533250199Sgrehan		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1534250199Sgrehan			NV_LOCK(sc);
1535250199Sgrehan			netvsc_setmulti(sc);
1536250199Sgrehan			NV_UNLOCK(sc);
1537250199Sgrehan			error = 0;
1538250199Sgrehan		}
1539250199Sgrehan#endif
1540295789Ssephe		error = EINVAL;
1541295789Ssephe		break;
1542250199Sgrehan	case SIOCSIFMEDIA:
1543250199Sgrehan	case SIOCGIFMEDIA:
1544295789Ssephe		error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd);
1545250199Sgrehan		break;
1546250199Sgrehan	default:
1547250199Sgrehan		error = ether_ioctl(ifp, cmd, data);
1548250199Sgrehan		break;
1549250199Sgrehan	}
1550250199Sgrehan
1551250199Sgrehan	return (error);
1552250199Sgrehan}
1553250199Sgrehan
1554250199Sgrehan/*
1555250199Sgrehan *
1556250199Sgrehan */
1557250199Sgrehanstatic void
1558250199Sgrehanhn_stop(hn_softc_t *sc)
1559250199Sgrehan{
1560250199Sgrehan	struct ifnet *ifp;
1561250199Sgrehan	int ret;
1562250199Sgrehan	struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev);
1563250199Sgrehan
1564250199Sgrehan	ifp = sc->hn_ifp;
1565250199Sgrehan
1566263853Sdelphij	if (bootverbose)
1567263853Sdelphij		printf(" Closing Device ...\n");
1568250199Sgrehan
1569250199Sgrehan	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1570283100Sdelphij	if_link_state_change(ifp, LINK_STATE_DOWN);
1571250199Sgrehan	sc->hn_initdone = 0;
1572250199Sgrehan
1573250199Sgrehan	ret = hv_rf_on_close(device_ctx);
1574250199Sgrehan}
1575250199Sgrehan
1576250199Sgrehan/*
1577250199Sgrehan * FreeBSD transmit entry point
1578250199Sgrehan */
1579250199Sgrehanstatic void
1580250199Sgrehanhn_start(struct ifnet *ifp)
1581250199Sgrehan{
1582250199Sgrehan	hn_softc_t *sc;
1583250199Sgrehan
1584250199Sgrehan	sc = ifp->if_softc;
1585250199Sgrehan	NV_LOCK(sc);
1586256363Sgrehan	if (sc->temp_unusable) {
1587256363Sgrehan		NV_UNLOCK(sc);
1588256363Sgrehan		return;
1589256363Sgrehan	}
1590250199Sgrehan	hn_start_locked(ifp);
1591250199Sgrehan	NV_UNLOCK(sc);
1592250199Sgrehan}
1593250199Sgrehan
1594250199Sgrehan/*
1595250199Sgrehan *
1596250199Sgrehan */
1597250199Sgrehanstatic void
1598250199Sgrehanhn_ifinit_locked(hn_softc_t *sc)
1599250199Sgrehan{
1600250199Sgrehan	struct ifnet *ifp;
1601250199Sgrehan	struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev);
1602250199Sgrehan	int ret;
1603250199Sgrehan
1604250199Sgrehan	ifp = sc->hn_ifp;
1605250199Sgrehan
1606250199Sgrehan	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1607250199Sgrehan		return;
1608250199Sgrehan	}
1609250199Sgrehan
1610250199Sgrehan	hv_promisc_mode = 1;
1611250199Sgrehan
1612250199Sgrehan	ret = hv_rf_on_open(device_ctx);
1613250199Sgrehan	if (ret != 0) {
1614250199Sgrehan		return;
1615250199Sgrehan	} else {
1616250199Sgrehan		sc->hn_initdone = 1;
1617250199Sgrehan	}
1618250199Sgrehan	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1619250199Sgrehan	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1620283100Sdelphij	if_link_state_change(ifp, LINK_STATE_UP);
1621250199Sgrehan}
1622250199Sgrehan
1623250199Sgrehan/*
1624250199Sgrehan *
1625250199Sgrehan */
1626250199Sgrehanstatic void
1627250199Sgrehanhn_ifinit(void *xsc)
1628250199Sgrehan{
1629250199Sgrehan	hn_softc_t *sc = xsc;
1630250199Sgrehan
1631250199Sgrehan	NV_LOCK(sc);
1632256363Sgrehan	if (sc->temp_unusable) {
1633256363Sgrehan		NV_UNLOCK(sc);
1634256363Sgrehan		return;
1635256363Sgrehan	}
1636256363Sgrehan	sc->temp_unusable = TRUE;
1637256363Sgrehan	NV_UNLOCK(sc);
1638256363Sgrehan
1639250199Sgrehan	hn_ifinit_locked(sc);
1640256363Sgrehan
1641256363Sgrehan	NV_LOCK(sc);
1642256363Sgrehan	sc->temp_unusable = FALSE;
1643250199Sgrehan	NV_UNLOCK(sc);
1644250199Sgrehan}
1645250199Sgrehan
1646250199Sgrehan#ifdef LATER
1647250199Sgrehan/*
1648250199Sgrehan *
1649250199Sgrehan */
1650250199Sgrehanstatic void
1651250199Sgrehanhn_watchdog(struct ifnet *ifp)
1652250199Sgrehan{
1653250199Sgrehan	hn_softc_t *sc;
1654250199Sgrehan	sc = ifp->if_softc;
1655250199Sgrehan
1656250199Sgrehan	printf("hn%d: watchdog timeout -- resetting\n", sc->hn_unit);
1657250199Sgrehan	hn_ifinit(sc);    /*???*/
1658250199Sgrehan	ifp->if_oerrors++;
1659250199Sgrehan}
1660250199Sgrehan#endif
1661250199Sgrehan
1662295789Ssephe#ifdef HN_LRO_HIWAT
1663295789Ssephestatic int
1664295789Ssephehn_lro_hiwat_sysctl(SYSCTL_HANDLER_ARGS)
1665295789Ssephe{
1666295789Ssephe	struct hn_softc *sc = arg1;
1667295789Ssephe	int hiwat, error;
1668295789Ssephe
1669295789Ssephe	hiwat = sc->hn_lro_hiwat;
1670295789Ssephe	error = sysctl_handle_int(oidp, &hiwat, 0, req);
1671295789Ssephe	if (error || req->newptr == NULL)
1672295789Ssephe		return error;
1673295789Ssephe
1674295789Ssephe	if (!HN_LRO_HIWAT_ISVALID(sc, hiwat))
1675295789Ssephe		return EINVAL;
1676295789Ssephe
1677295789Ssephe	if (sc->hn_lro_hiwat != hiwat)
1678295789Ssephe		hn_set_lro_hiwat(sc, hiwat);
1679295789Ssephe	return 0;
1680295789Ssephe}
1681295789Ssephe#endif	/* HN_LRO_HIWAT */
1682295789Ssephe
1683295789Ssephestatic int
1684295948Ssephehn_tx_chimney_size_sysctl(SYSCTL_HANDLER_ARGS)
1685295948Ssephe{
1686295948Ssephe	struct hn_softc *sc = arg1;
1687295948Ssephe	int chimney_size, error;
1688295948Ssephe
1689295948Ssephe	chimney_size = sc->hn_tx_chimney_size;
1690295948Ssephe	error = sysctl_handle_int(oidp, &chimney_size, 0, req);
1691295948Ssephe	if (error || req->newptr == NULL)
1692295948Ssephe		return error;
1693295948Ssephe
1694295948Ssephe	if (chimney_size > sc->hn_tx_chimney_max || chimney_size <= 0)
1695295948Ssephe		return EINVAL;
1696295948Ssephe
1697295948Ssephe	if (sc->hn_tx_chimney_size != chimney_size)
1698295948Ssephe		sc->hn_tx_chimney_size = chimney_size;
1699295948Ssephe	return 0;
1700295948Ssephe}
1701295948Ssephe
1702295948Ssephestatic int
1703295789Ssephehn_check_iplen(const struct mbuf *m, int hoff)
1704295789Ssephe{
1705295789Ssephe	const struct ip *ip;
1706295789Ssephe	int len, iphlen, iplen;
1707295789Ssephe	const struct tcphdr *th;
1708295789Ssephe	int thoff;				/* TCP data offset */
1709295789Ssephe
1710295789Ssephe	len = hoff + sizeof(struct ip);
1711295789Ssephe
1712295789Ssephe	/* The packet must be at least the size of an IP header. */
1713295789Ssephe	if (m->m_pkthdr.len < len)
1714295789Ssephe		return IPPROTO_DONE;
1715295789Ssephe
1716295789Ssephe	/* The fixed IP header must reside completely in the first mbuf. */
1717295789Ssephe	if (m->m_len < len)
1718295789Ssephe		return IPPROTO_DONE;
1719295789Ssephe
1720295789Ssephe	ip = mtodo(m, hoff);
1721295789Ssephe
1722295789Ssephe	/* Bound check the packet's stated IP header length. */
1723295789Ssephe	iphlen = ip->ip_hl << 2;
1724295789Ssephe	if (iphlen < sizeof(struct ip))		/* minimum header length */
1725295789Ssephe		return IPPROTO_DONE;
1726295789Ssephe
1727295789Ssephe	/* The full IP header must reside completely in the one mbuf. */
1728295789Ssephe	if (m->m_len < hoff + iphlen)
1729295789Ssephe		return IPPROTO_DONE;
1730295789Ssephe
1731295789Ssephe	iplen = ntohs(ip->ip_len);
1732295789Ssephe
1733295789Ssephe	/*
1734295789Ssephe	 * Check that the amount of data in the buffers is as
1735295789Ssephe	 * at least much as the IP header would have us expect.
1736295789Ssephe	 */
1737295789Ssephe	if (m->m_pkthdr.len < hoff + iplen)
1738295789Ssephe		return IPPROTO_DONE;
1739295789Ssephe
1740295789Ssephe	/*
1741295789Ssephe	 * Ignore IP fragments.
1742295789Ssephe	 */
1743295789Ssephe	if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF))
1744295789Ssephe		return IPPROTO_DONE;
1745295789Ssephe
1746295789Ssephe	/*
1747295789Ssephe	 * The TCP/IP or UDP/IP header must be entirely contained within
1748295789Ssephe	 * the first fragment of a packet.
1749295789Ssephe	 */
1750295789Ssephe	switch (ip->ip_p) {
1751295789Ssephe	case IPPROTO_TCP:
1752295789Ssephe		if (iplen < iphlen + sizeof(struct tcphdr))
1753295789Ssephe			return IPPROTO_DONE;
1754295789Ssephe		if (m->m_len < hoff + iphlen + sizeof(struct tcphdr))
1755295789Ssephe			return IPPROTO_DONE;
1756295789Ssephe		th = (const struct tcphdr *)((const uint8_t *)ip + iphlen);
1757295789Ssephe		thoff = th->th_off << 2;
1758295789Ssephe		if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen)
1759295789Ssephe			return IPPROTO_DONE;
1760295789Ssephe		if (m->m_len < hoff + iphlen + thoff)
1761295789Ssephe			return IPPROTO_DONE;
1762295789Ssephe		break;
1763295789Ssephe	case IPPROTO_UDP:
1764295789Ssephe		if (iplen < iphlen + sizeof(struct udphdr))
1765295789Ssephe			return IPPROTO_DONE;
1766295789Ssephe		if (m->m_len < hoff + iphlen + sizeof(struct udphdr))
1767295789Ssephe			return IPPROTO_DONE;
1768295789Ssephe		break;
1769295789Ssephe	default:
1770295789Ssephe		if (iplen < iphlen)
1771295789Ssephe			return IPPROTO_DONE;
1772295789Ssephe		break;
1773295789Ssephe	}
1774295789Ssephe	return ip->ip_p;
1775295789Ssephe}
1776295789Ssephe
1777295948Ssephestatic void
1778295948Ssephehn_dma_map_paddr(void *arg, bus_dma_segment_t *segs, int nseg, int error)
1779295948Ssephe{
1780295948Ssephe	bus_addr_t *paddr = arg;
1781295948Ssephe
1782295948Ssephe	if (error)
1783295948Ssephe		return;
1784295948Ssephe
1785295948Ssephe	KASSERT(nseg == 1, ("too many segments %d!", nseg));
1786295948Ssephe	*paddr = segs->ds_addr;
1787295948Ssephe}
1788295948Ssephe
1789295948Ssephestatic int
1790295948Ssephehn_create_tx_ring(struct hn_softc *sc)
1791295948Ssephe{
1792295948Ssephe	bus_dma_tag_t parent_dtag;
1793295948Ssephe	int error, i;
1794295948Ssephe
1795295948Ssephe	sc->hn_txdesc_cnt = HN_TX_DESC_CNT;
1796295948Ssephe	sc->hn_txdesc = malloc(sizeof(struct hn_txdesc) * sc->hn_txdesc_cnt,
1797295948Ssephe	    M_NETVSC, M_WAITOK | M_ZERO);
1798295948Ssephe	SLIST_INIT(&sc->hn_txlist);
1799295948Ssephe	mtx_init(&sc->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN);
1800295948Ssephe
1801295948Ssephe	parent_dtag = bus_get_dma_tag(sc->hn_dev);
1802295948Ssephe
1803295948Ssephe	/* DMA tag for RNDIS messages. */
1804295948Ssephe	error = bus_dma_tag_create(parent_dtag, /* parent */
1805295948Ssephe	    HN_RNDIS_MSG_ALIGN,		/* alignment */
1806295948Ssephe	    HN_RNDIS_MSG_BOUNDARY,	/* boundary */
1807295948Ssephe	    BUS_SPACE_MAXADDR,		/* lowaddr */
1808295948Ssephe	    BUS_SPACE_MAXADDR,		/* highaddr */
1809295948Ssephe	    NULL, NULL,			/* filter, filterarg */
1810295948Ssephe	    HN_RNDIS_MSG_LEN,		/* maxsize */
1811295948Ssephe	    1,				/* nsegments */
1812295948Ssephe	    HN_RNDIS_MSG_LEN,		/* maxsegsize */
1813295948Ssephe	    0,				/* flags */
1814295948Ssephe	    NULL,			/* lockfunc */
1815295948Ssephe	    NULL,			/* lockfuncarg */
1816295948Ssephe	    &sc->hn_tx_rndis_dtag);
1817295948Ssephe	if (error) {
1818295948Ssephe		device_printf(sc->hn_dev, "failed to create rndis dmatag\n");
1819295948Ssephe		return error;
1820295948Ssephe	}
1821295948Ssephe
1822295948Ssephe	/* DMA tag for data. */
1823295948Ssephe	error = bus_dma_tag_create(parent_dtag, /* parent */
1824295948Ssephe	    1,				/* alignment */
1825295948Ssephe	    HN_TX_DATA_BOUNDARY,	/* boundary */
1826295948Ssephe	    BUS_SPACE_MAXADDR,		/* lowaddr */
1827295948Ssephe	    BUS_SPACE_MAXADDR,		/* highaddr */
1828295948Ssephe	    NULL, NULL,			/* filter, filterarg */
1829295948Ssephe	    HN_TX_DATA_MAXSIZE,		/* maxsize */
1830295948Ssephe	    HN_TX_DATA_SEGCNT_MAX,	/* nsegments */
1831295948Ssephe	    HN_TX_DATA_SEGSIZE,		/* maxsegsize */
1832295948Ssephe	    0,				/* flags */
1833295948Ssephe	    NULL,			/* lockfunc */
1834295948Ssephe	    NULL,			/* lockfuncarg */
1835295948Ssephe	    &sc->hn_tx_data_dtag);
1836295948Ssephe	if (error) {
1837295948Ssephe		device_printf(sc->hn_dev, "failed to create data dmatag\n");
1838295948Ssephe		return error;
1839295948Ssephe	}
1840295948Ssephe
1841295948Ssephe	for (i = 0; i < sc->hn_txdesc_cnt; ++i) {
1842295948Ssephe		struct hn_txdesc *txd = &sc->hn_txdesc[i];
1843295948Ssephe
1844295948Ssephe		txd->sc = sc;
1845295948Ssephe
1846295948Ssephe		/*
1847295948Ssephe		 * Allocate and load RNDIS messages.
1848295948Ssephe		 */
1849295948Ssephe        	error = bus_dmamem_alloc(sc->hn_tx_rndis_dtag,
1850295948Ssephe		    (void **)&txd->rndis_msg,
1851295948Ssephe		    BUS_DMA_WAITOK | BUS_DMA_COHERENT,
1852295948Ssephe		    &txd->rndis_msg_dmap);
1853295948Ssephe		if (error) {
1854295948Ssephe			device_printf(sc->hn_dev,
1855295948Ssephe			    "failed to allocate rndis_msg, %d\n", i);
1856295948Ssephe			return error;
1857295948Ssephe		}
1858295948Ssephe
1859295948Ssephe		error = bus_dmamap_load(sc->hn_tx_rndis_dtag,
1860295948Ssephe		    txd->rndis_msg_dmap,
1861295948Ssephe		    txd->rndis_msg, HN_RNDIS_MSG_LEN,
1862295948Ssephe		    hn_dma_map_paddr, &txd->rndis_msg_paddr,
1863295948Ssephe		    BUS_DMA_NOWAIT);
1864295948Ssephe		if (error) {
1865295948Ssephe			device_printf(sc->hn_dev,
1866295948Ssephe			    "failed to load rndis_msg, %d\n", i);
1867295948Ssephe			bus_dmamem_free(sc->hn_tx_rndis_dtag,
1868295948Ssephe			    txd->rndis_msg, txd->rndis_msg_dmap);
1869295948Ssephe			return error;
1870295948Ssephe		}
1871295948Ssephe
1872295948Ssephe		/* DMA map for TX data. */
1873295948Ssephe		error = bus_dmamap_create(sc->hn_tx_data_dtag, 0,
1874295948Ssephe		    &txd->data_dmap);
1875295948Ssephe		if (error) {
1876295948Ssephe			device_printf(sc->hn_dev,
1877295948Ssephe			    "failed to allocate tx data dmamap\n");
1878295948Ssephe			bus_dmamap_unload(sc->hn_tx_rndis_dtag,
1879295948Ssephe			    txd->rndis_msg_dmap);
1880295948Ssephe			bus_dmamem_free(sc->hn_tx_rndis_dtag,
1881295948Ssephe			    txd->rndis_msg, txd->rndis_msg_dmap);
1882295948Ssephe			return error;
1883295948Ssephe		}
1884295948Ssephe
1885295948Ssephe		/* All set, put it to list */
1886295948Ssephe		txd->flags |= HN_TXD_FLAG_ONLIST;
1887295948Ssephe		SLIST_INSERT_HEAD(&sc->hn_txlist, txd, link);
1888295948Ssephe	}
1889295948Ssephe	sc->hn_txdesc_avail = sc->hn_txdesc_cnt;
1890295948Ssephe
1891295948Ssephe	return 0;
1892295948Ssephe}
1893295948Ssephe
1894295948Ssephestatic void
1895295948Ssephehn_destroy_tx_ring(struct hn_softc *sc)
1896295948Ssephe{
1897295948Ssephe	struct hn_txdesc *txd;
1898295948Ssephe
1899295948Ssephe	while ((txd = SLIST_FIRST(&sc->hn_txlist)) != NULL) {
1900295948Ssephe		KASSERT(txd->m == NULL, ("still has mbuf installed"));
1901295948Ssephe		KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0,
1902295948Ssephe		    ("still dma mapped"));
1903295948Ssephe		SLIST_REMOVE_HEAD(&sc->hn_txlist, link);
1904295948Ssephe
1905295948Ssephe		bus_dmamap_unload(sc->hn_tx_rndis_dtag,
1906295948Ssephe		    txd->rndis_msg_dmap);
1907295948Ssephe		bus_dmamem_free(sc->hn_tx_rndis_dtag,
1908295948Ssephe		    txd->rndis_msg, txd->rndis_msg_dmap);
1909295948Ssephe
1910295948Ssephe		bus_dmamap_destroy(sc->hn_tx_data_dtag, txd->data_dmap);
1911295948Ssephe	}
1912295948Ssephe
1913295948Ssephe	if (sc->hn_tx_data_dtag != NULL)
1914295948Ssephe		bus_dma_tag_destroy(sc->hn_tx_data_dtag);
1915295948Ssephe	if (sc->hn_tx_rndis_dtag != NULL)
1916295948Ssephe		bus_dma_tag_destroy(sc->hn_tx_rndis_dtag);
1917295948Ssephe	free(sc->hn_txdesc, M_NETVSC);
1918295948Ssephe	mtx_destroy(&sc->hn_txlist_spin);
1919295948Ssephe}
1920295948Ssephe
1921250199Sgrehanstatic device_method_t netvsc_methods[] = {
1922250199Sgrehan        /* Device interface */
1923250199Sgrehan        DEVMETHOD(device_probe,         netvsc_probe),
1924250199Sgrehan        DEVMETHOD(device_attach,        netvsc_attach),
1925250199Sgrehan        DEVMETHOD(device_detach,        netvsc_detach),
1926250199Sgrehan        DEVMETHOD(device_shutdown,      netvsc_shutdown),
1927250199Sgrehan
1928250199Sgrehan        { 0, 0 }
1929250199Sgrehan};
1930250199Sgrehan
1931250199Sgrehanstatic driver_t netvsc_driver = {
1932250199Sgrehan        NETVSC_DEVNAME,
1933250199Sgrehan        netvsc_methods,
1934250199Sgrehan        sizeof(hn_softc_t)
1935250199Sgrehan};
1936250199Sgrehan
1937250199Sgrehanstatic devclass_t netvsc_devclass;
1938250199Sgrehan
1939250199SgrehanDRIVER_MODULE(hn, vmbus, netvsc_driver, netvsc_devclass, 0, 0);
1940250199SgrehanMODULE_VERSION(hn, 1);
1941250199SgrehanMODULE_DEPEND(hn, vmbus, 1, 1, 1);
1942