hv_netvsc_drv_freebsd.c revision 295789
190075Sobrien/*- 290075Sobrien * Copyright (c) 2010-2012 Citrix Inc. 3169699Skan * Copyright (c) 2009-2012 Microsoft Corp. 490075Sobrien * Copyright (c) 2012 NetApp Inc. 590075Sobrien * All rights reserved. 690075Sobrien * 790075Sobrien * Redistribution and use in source and binary forms, with or without 890075Sobrien * modification, are permitted provided that the following conditions 990075Sobrien * are met: 1090075Sobrien * 1. Redistributions of source code must retain the above copyright 1190075Sobrien * notice unmodified, this list of conditions, and the following 1290075Sobrien * disclaimer. 1390075Sobrien * 2. Redistributions in binary form must reproduce the above copyright 1490075Sobrien * notice, this list of conditions and the following disclaimer in the 1590075Sobrien * documentation and/or other materials provided with the distribution. 1690075Sobrien * 1790075Sobrien * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1890075Sobrien * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19169699Skan * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20169699Skan * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 2190075Sobrien * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2290075Sobrien * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2390075Sobrien * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2490075Sobrien * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2590075Sobrien * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2690075Sobrien * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27132729Skan */ 2890075Sobrien 2990075Sobrien/*- 30169699Skan * Copyright (c) 2004-2006 Kip Macy 3190075Sobrien * All rights reserved. 3290075Sobrien * 3390075Sobrien * Redistribution and use in source and binary forms, with or without 34169699Skan * modification, are permitted provided that the following conditions 35169699Skan * are met: 36169699Skan * 1. Redistributions of source code must retain the above copyright 37169699Skan * notice, this list of conditions and the following disclaimer. 3890075Sobrien * 2. Redistributions in binary form must reproduce the above copyright 3990075Sobrien * notice, this list of conditions and the following disclaimer in the 4090075Sobrien * documentation and/or other materials provided with the distribution. 41117406Skan * 4290075Sobrien * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 4390075Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 4490075Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 4590075Sobrien * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 4690075Sobrien * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 4790075Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 4890075Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 4990075Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 5090075Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 5190075Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 5290075Sobrien * SUCH DAMAGE. 5390075Sobrien */ 5490075Sobrien 5590075Sobrien#include <sys/cdefs.h> 56132729Skan__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c 295789 2016-02-19 02:03:14Z sephe $"); 5790075Sobrien 5890075Sobrien#include "opt_inet6.h" 5990075Sobrien#include "opt_inet.h" 6090075Sobrien 6190075Sobrien#include <sys/param.h> 6290075Sobrien#include <sys/systm.h> 6390075Sobrien#include <sys/sockio.h> 6490075Sobrien#include <sys/mbuf.h> 6590075Sobrien#include <sys/malloc.h> 6690075Sobrien#include <sys/module.h> 6790075Sobrien#include <sys/kernel.h> 6890075Sobrien#include <sys/socket.h> 6990075Sobrien#include <sys/queue.h> 7090075Sobrien#include <sys/lock.h> 7190075Sobrien#include <sys/sx.h> 7290075Sobrien#include <sys/sysctl.h> 73169699Skan 74169699Skan#include <net/if.h> 75169699Skan#include <net/if_arp.h> 7690075Sobrien#include <net/ethernet.h> 7790075Sobrien#include <net/if_dl.h> 7890075Sobrien#include <net/if_media.h> 7990075Sobrien 8090075Sobrien#include <net/bpf.h> 8190075Sobrien 8290075Sobrien#include <net/if_types.h> 8390075Sobrien#include <net/if_vlan_var.h> 8490075Sobrien#include <net/if.h> 85169699Skan 86169699Skan#include <netinet/in_systm.h> 87169699Skan#include <netinet/in.h> 88132729Skan#include <netinet/ip.h> 8990075Sobrien#include <netinet/if_ether.h> 9090075Sobrien#include <netinet/tcp.h> 9190075Sobrien#include <netinet/udp.h> 9290075Sobrien#include <netinet/ip6.h> 93117406Skan 94169699Skan#include <vm/vm.h> 9590075Sobrien#include <vm/vm_param.h> 9690075Sobrien#include <vm/vm_kern.h> 97169699Skan#include <vm/pmap.h> 98132729Skan 99132729Skan#include <machine/bus.h> 100132729Skan#include <machine/resource.h> 10190075Sobrien#include <machine/frame.h> 10290075Sobrien#include <machine/vmparam.h> 10390075Sobrien 10490075Sobrien#include <sys/bus.h> 10590075Sobrien#include <sys/rman.h> 10690075Sobrien#include <sys/mutex.h> 10790075Sobrien#include <sys/errno.h> 10890075Sobrien#include <sys/types.h> 10990075Sobrien#include <machine/atomic.h> 11090075Sobrien 11190075Sobrien#include <machine/intr_machdep.h> 11290075Sobrien 11390075Sobrien#include <machine/in_cksum.h> 11490075Sobrien 115169699Skan#include <dev/hyperv/include/hyperv.h> 116169699Skan#include "hv_net_vsc.h" 11790075Sobrien#include "hv_rndis.h" 11890075Sobrien#include "hv_rndis_filter.h" 119169699Skan 120169699Skan 12190075Sobrien/* Short for Hyper-V network interface */ 12290075Sobrien#define NETVSC_DEVNAME "hn" 12390075Sobrien 124169699Skan/* 12590075Sobrien * It looks like offset 0 of buf is reserved to hold the softc pointer. 12690075Sobrien * The sc pointer evidently not needed, and is not presently populated. 12790075Sobrien * The packet offset is where the netvsc_packet starts in the buffer. 12890075Sobrien */ 12990075Sobrien#define HV_NV_SC_PTR_OFFSET_IN_BUF 0 13090075Sobrien#define HV_NV_PACKET_OFFSET_IN_BUF 16 13190075Sobrien 13290075Sobrien/* 13390075Sobrien * A unified flag for all outbound check sum flags is useful, 13490075Sobrien * and it helps avoiding unnecessary check sum calculation in 13590075Sobrien * network forwarding scenario. 13690075Sobrien */ 13790075Sobrien#define HV_CSUM_FOR_OUTBOUND \ 13890075Sobrien (CSUM_IP|CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP|CSUM_IP_TSO| \ 13990075Sobrien CSUM_IP_ISCSI|CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP| \ 14090075Sobrien CSUM_IP6_TSO|CSUM_IP6_ISCSI) 14190075Sobrien 14290075Sobrien/* XXX move to netinet/tcp_lro.h */ 14390075Sobrien#define HN_LRO_HIWAT_MAX 65535 14490075Sobrien#define HN_LRO_HIWAT_DEF HN_LRO_HIWAT_MAX 14590075Sobrien/* YYY 2*MTU is a bit rough, but should be good enough. */ 14690075Sobrien#define HN_LRO_HIWAT_MTULIM(ifp) (2 * (ifp)->if_mtu) 14790075Sobrien#define HN_LRO_HIWAT_ISVALID(sc, hiwat) \ 148132729Skan ((hiwat) >= HN_LRO_HIWAT_MTULIM((sc)->hn_ifp) || \ 149132729Skan (hiwat) <= HN_LRO_HIWAT_MAX) 150132729Skan 151132729Skan/* 15290075Sobrien * Be aware that this sleepable mutex will exhibit WITNESS errors when 15390075Sobrien * certain TCP and ARP code paths are taken. This appears to be a 15490075Sobrien * well-known condition, as all other drivers checked use a sleeping 15590075Sobrien * mutex to protect their transmit paths. 15690075Sobrien * Also Be aware that mutexes do not play well with semaphores, and there 15790075Sobrien * is a conflicting semaphore in a certain channel code path. 15890075Sobrien */ 159169699Skan#define NV_LOCK_INIT(_sc, _name) \ 16090075Sobrien mtx_init(&(_sc)->hn_lock, _name, MTX_NETWORK_LOCK, MTX_DEF) 16190075Sobrien#define NV_LOCK(_sc) mtx_lock(&(_sc)->hn_lock) 162169699Skan#define NV_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->hn_lock, MA_OWNED) 163169699Skan#define NV_UNLOCK(_sc) mtx_unlock(&(_sc)->hn_lock) 16490075Sobrien#define NV_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->hn_lock) 16590075Sobrien 16690075Sobrien 16790075Sobrien/* 16890075Sobrien * Globals 16990075Sobrien */ 17090075Sobrien 171117406Skanint hv_promisc_mode = 0; /* normal mode by default */ 17290075Sobrien 17390075Sobrien/* Trust tcp segements verification on host side. */ 174132729Skanstatic int hn_trust_hosttcp = 0; 17590075SobrienTUNABLE_INT("dev.hn.trust_hosttcp", &hn_trust_hosttcp); 17690075Sobrien 17790075Sobrien/* 17890075Sobrien * Forward declarations 17990075Sobrien */ 18090075Sobrienstatic void hn_stop(hn_softc_t *sc); 18190075Sobrienstatic void hn_ifinit_locked(hn_softc_t *sc); 18290075Sobrienstatic void hn_ifinit(void *xsc); 18390075Sobrienstatic int hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); 18490075Sobrienstatic int hn_start_locked(struct ifnet *ifp); 18590075Sobrienstatic void hn_start(struct ifnet *ifp); 18690075Sobrienstatic int hn_ifmedia_upd(struct ifnet *ifp); 18790075Sobrienstatic void hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); 188132729Skan#ifdef HN_LRO_HIWAT 189132729Skanstatic int hn_lro_hiwat_sysctl(SYSCTL_HANDLER_ARGS); 190132729Skan#endif 19190075Sobrienstatic int hn_check_iplen(const struct mbuf *, int); 19290075Sobrien 19390075Sobrienstatic __inline void 19490075Sobrienhn_set_lro_hiwat(struct hn_softc *sc, int hiwat) 19590075Sobrien{ 19690075Sobrien sc->hn_lro_hiwat = hiwat; 19790075Sobrien#ifdef HN_LRO_HIWAT 19890075Sobrien sc->hn_lro.lro_hiwat = sc->hn_lro_hiwat; 19990075Sobrien#endif 20090075Sobrien} 20190075Sobrien 20290075Sobrien/* 20390075Sobrien * NetVsc get message transport protocol type 20490075Sobrien */ 20590075Sobrienstatic uint32_t get_transport_proto_type(struct mbuf *m_head) 20690075Sobrien{ 20790075Sobrien uint32_t ret_val = TRANSPORT_TYPE_NOT_IP; 208169699Skan uint16_t ether_type = 0; 209169699Skan int ether_len = 0; 210169699Skan struct ether_vlan_header *eh; 211117406Skan#ifdef INET 21290075Sobrien struct ip *iph; 213132729Skan#endif 214132729Skan#ifdef INET6 215132729Skan struct ip6_hdr *ip6; 216132729Skan#endif 217132729Skan 218132729Skan eh = mtod(m_head, struct ether_vlan_header*); 219132729Skan if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 220132729Skan ether_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 22190075Sobrien ether_type = eh->evl_proto; 22290075Sobrien } else { 22390075Sobrien ether_len = ETHER_HDR_LEN; 22490075Sobrien ether_type = eh->evl_encap_proto; 22590075Sobrien } 22690075Sobrien 227132729Skan switch (ntohs(ether_type)) { 228132729Skan#ifdef INET6 229132729Skan case ETHERTYPE_IPV6: 230132729Skan ip6 = (struct ip6_hdr *)(m_head->m_data + ether_len); 23190075Sobrien 23290075Sobrien if (IPPROTO_TCP == ip6->ip6_nxt) { 23390075Sobrien ret_val = TRANSPORT_TYPE_IPV6_TCP; 234132729Skan } else if (IPPROTO_UDP == ip6->ip6_nxt) { 235132729Skan ret_val = TRANSPORT_TYPE_IPV6_UDP; 236132729Skan } 237132729Skan break; 238132729Skan#endif 239132729Skan#ifdef INET 240132729Skan case ETHERTYPE_IP: 24190075Sobrien iph = (struct ip *)(m_head->m_data + ether_len); 24290075Sobrien 243117406Skan if (IPPROTO_TCP == iph->ip_p) { 244169699Skan ret_val = TRANSPORT_TYPE_IPV4_TCP; 245169699Skan } else if (IPPROTO_UDP == iph->ip_p) { 246169699Skan ret_val = TRANSPORT_TYPE_IPV4_UDP; 247117406Skan } 24890075Sobrien break; 24990075Sobrien#endif 25090075Sobrien default: 25190075Sobrien ret_val = TRANSPORT_TYPE_NOT_IP; 252117406Skan break; 25390075Sobrien } 25490075Sobrien 25590075Sobrien return (ret_val); 25690075Sobrien} 25790075Sobrien 25890075Sobrienstatic int 25990075Sobrienhn_ifmedia_upd(struct ifnet *ifp __unused) 26090075Sobrien{ 26190075Sobrien 26290075Sobrien return EOPNOTSUPP; 26390075Sobrien} 26490075Sobrien 26590075Sobrienstatic void 26690075Sobrienhn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 26790075Sobrien{ 26890075Sobrien struct hn_softc *sc = ifp->if_softc; 269117406Skan 27090075Sobrien ifmr->ifm_status = IFM_AVALID; 27190075Sobrien ifmr->ifm_active = IFM_ETHER; 27290075Sobrien 27390075Sobrien if (!sc->hn_carrier) { 27490075Sobrien ifmr->ifm_active |= IFM_NONE; 275169699Skan return; 276169699Skan } 277169699Skan ifmr->ifm_status |= IFM_ACTIVE; 27890075Sobrien ifmr->ifm_active |= IFM_10G_T | IFM_FDX; 279169699Skan} 28090075Sobrien 28190075Sobrien/* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */ 28290075Sobrienstatic const hv_guid g_net_vsc_device_type = { 28390075Sobrien .data = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46, 28490075Sobrien 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E} 285132729Skan}; 28690075Sobrien 287169699Skan/* 288169699Skan * Standard probe entry point. 289169699Skan * 290169699Skan */ 291169699Skanstatic int 292169699Skannetvsc_probe(device_t dev) 293132729Skan{ 294169699Skan const char *p; 295169699Skan 296132729Skan p = vmbus_get_type(dev); 297169699Skan if (!memcmp(p, &g_net_vsc_device_type.data, sizeof(hv_guid))) { 298169699Skan device_set_desc(dev, "Synthetic Network Interface"); 299169699Skan if (bootverbose) 300169699Skan printf("Netvsc probe... DONE \n"); 301169699Skan 30290075Sobrien return (BUS_PROBE_DEFAULT); 303169699Skan } 30490075Sobrien 305132729Skan return (ENXIO); 306169699Skan} 307169699Skan 308169699Skan/* 309132729Skan * Standard attach entry point. 310132729Skan * 31190075Sobrien * Called when the driver is loaded. It allocates needed resources, 312132729Skan * and initializes the "hardware" and software. 31390075Sobrien */ 314132729Skanstatic int 31590075Sobriennetvsc_attach(device_t dev) 316132729Skan{ 317132729Skan struct hv_device *device_ctx = vmbus_get_devctx(dev); 318132729Skan netvsc_device_info device_info; 319132729Skan hn_softc_t *sc; 32090075Sobrien int unit = device_get_unit(dev); 321132729Skan struct ifnet *ifp; 322132729Skan struct sysctl_oid_list *child; 323132729Skan struct sysctl_ctx_list *ctx; 32490075Sobrien int ret; 325169699Skan 326169699Skan sc = device_get_softc(dev); 327169699Skan if (sc == NULL) { 328132729Skan return (ENOMEM); 329132729Skan } 330132729Skan 331132729Skan bzero(sc, sizeof(hn_softc_t)); 33290075Sobrien sc->hn_unit = unit; 333132729Skan sc->hn_dev = dev; 334132729Skan sc->hn_lro_hiwat = HN_LRO_HIWAT_DEF; 335132729Skan sc->hn_trust_hosttcp = hn_trust_hosttcp; 336132729Skan 33790075Sobrien NV_LOCK_INIT(sc, "NetVSCLock"); 338132729Skan 33990075Sobrien sc->hn_dev_obj = device_ctx; 340132729Skan 34190075Sobrien ifp = sc->hn_ifp = sc->arpcom.ac_ifp = if_alloc(IFT_ETHER); 342132729Skan ifp->if_softc = sc; 343132729Skan 344132729Skan if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 345132729Skan ifp->if_dunit = unit; 346132729Skan ifp->if_dname = NETVSC_DEVNAME; 347132729Skan 348132729Skan ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 349132729Skan ifp->if_ioctl = hn_ioctl; 350132729Skan ifp->if_start = hn_start; 351132729Skan ifp->if_init = hn_ifinit; 352132729Skan /* needed by hv_rf_on_device_add() code */ 353132729Skan ifp->if_mtu = ETHERMTU; 354132729Skan IFQ_SET_MAXLEN(&ifp->if_snd, 512); 355132729Skan ifp->if_snd.ifq_drv_maxlen = 511; 356169699Skan IFQ_SET_READY(&ifp->if_snd); 357169699Skan 358169699Skan ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts); 359169699Skan ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL); 360132729Skan ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO); 361132729Skan /* XXX ifmedia_set really should do this for us */ 362132729Skan sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media; 363132729Skan 364132729Skan /* 365117406Skan * Tell upper layers that we support full VLAN capability. 366117406Skan */ 367132729Skan ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); 368117406Skan ifp->if_capabilities |= 369117406Skan IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO | 370117406Skan IFCAP_LRO; 371117406Skan ifp->if_capenable |= 372132729Skan IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO | 373117406Skan IFCAP_LRO; 374117406Skan /* 375117406Skan * Only enable UDP checksum offloading when it is on 2012R2 or 376117406Skan * later. UDP checksum offloading doesn't work on earlier 377117406Skan * Windows releases. 378117406Skan */ 379117406Skan if (hv_vmbus_protocal_version >= HV_VMBUS_VERSION_WIN8_1) 380117406Skan ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 381117406Skan else 382117406Skan ifp->if_hwassist = CSUM_TCP | CSUM_TSO; 383117406Skan 38490075Sobrien ret = hv_rf_on_device_add(device_ctx, &device_info); 38590075Sobrien if (ret != 0) { 38690075Sobrien if_free(ifp); 38790075Sobrien 38890075Sobrien return (ret); 38990075Sobrien } 39090075Sobrien if (device_info.link_state == 0) { 39190075Sobrien sc->hn_carrier = 1; 392117406Skan } 393117406Skan 39490075Sobrien#if defined(INET) || defined(INET6) 395117406Skan tcp_lro_init(&sc->hn_lro); 396117406Skan /* Driver private LRO settings */ 397117406Skan sc->hn_lro.ifp = ifp; 398117406Skan#ifdef HN_LRO_HIWAT 399117406Skan sc->hn_lro.lro_hiwat = sc->hn_lro_hiwat; 400117406Skan#endif 401117406Skan#endif /* INET || INET6 */ 402117406Skan 403117406Skan ether_ifattach(ifp, device_info.mac_addr); 404117406Skan 405117406Skan ctx = device_get_sysctl_ctx(dev); 406117406Skan child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 407117406Skan 408169699Skan SYSCTL_ADD_INT(ctx, child, OID_AUTO, "lro_queued", 409117406Skan CTLFLAG_RW, &sc->hn_lro.lro_queued, 0, "LRO queued"); 410169699Skan SYSCTL_ADD_INT(ctx, child, OID_AUTO, "lro_flushed", 411117406Skan CTLFLAG_RW, &sc->hn_lro.lro_flushed, 0, "LRO flushed"); 412117406Skan SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "lro_tried", 413117406Skan CTLFLAG_RW, &sc->hn_lro_tried, "# of LRO tries"); 414117406Skan#ifdef HN_LRO_HIWAT 41590075Sobrien SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_hiwat", 41690075Sobrien CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_lro_hiwat_sysctl, 41790075Sobrien "I", "LRO high watermark"); 418117406Skan#endif 41990075Sobrien SYSCTL_ADD_INT(ctx, child, OID_AUTO, "trust_hosttcp", 42090075Sobrien CTLFLAG_RW, &sc->hn_trust_hosttcp, 0, 421117406Skan "Trust tcp segement verification on host side, " 422117406Skan "when csum info is missing"); 42396557Sobrien SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "csum_ip", 424117406Skan CTLFLAG_RW, &sc->hn_csum_ip, "RXCSUM IP"); 42590075Sobrien SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "csum_tcp", 42690075Sobrien CTLFLAG_RW, &sc->hn_csum_tcp, "RXCSUM TCP"); 42790075Sobrien SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "csum_trusted", 42890075Sobrien CTLFLAG_RW, &sc->hn_csum_trusted, 42990075Sobrien "# of TCP segements that we trust host's csum verification"); 43090075Sobrien SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "small_pkts", 43190075Sobrien CTLFLAG_RW, &sc->hn_small_pkts, "# of small packets received"); 43290075Sobrien 43390075Sobrien if (unit == 0) { 434132729Skan struct sysctl_ctx_list *dc_ctx; 43590075Sobrien struct sysctl_oid_list *dc_child; 43690075Sobrien devclass_t dc; 43790075Sobrien 438117406Skan /* 439117406Skan * Add sysctl nodes for devclass 440117406Skan */ 441117406Skan dc = device_get_devclass(dev); 442117406Skan dc_ctx = devclass_get_sysctl_ctx(dc); 443117406Skan dc_child = SYSCTL_CHILDREN(devclass_get_sysctl_tree(dc)); 444117406Skan 445117406Skan SYSCTL_ADD_INT(dc_ctx, dc_child, OID_AUTO, "trust_hosttcp", 446117406Skan CTLFLAG_RD, &hn_trust_hosttcp, 0, 447117406Skan "Trust tcp segement verification on host side, " 448117406Skan "when csum info is missing (global setting)"); 449117406Skan } 450117406Skan 451117406Skan return (0); 452117406Skan} 453117406Skan 454117406Skan/* 455117406Skan * Standard detach entry point 456117406Skan */ 457117406Skanstatic int 458117406Skannetvsc_detach(device_t dev) 459117406Skan{ 460117406Skan struct hn_softc *sc = device_get_softc(dev); 461117406Skan struct hv_device *hv_device = vmbus_get_devctx(dev); 462117406Skan 463117406Skan if (bootverbose) 464117406Skan printf("netvsc_detach\n"); 465117406Skan 466117406Skan /* 467117406Skan * XXXKYS: Need to clean up all our 468117406Skan * driver state; this is the driver 469117406Skan * unloading. 470117406Skan */ 471117406Skan 472117406Skan /* 473117406Skan * XXXKYS: Need to stop outgoing traffic and unregister 474117406Skan * the netdevice. 475117406Skan */ 476117406Skan 477117406Skan hv_rf_on_device_remove(hv_device, HV_RF_NV_DESTROY_CHANNEL); 478117406Skan 479117406Skan ifmedia_removeall(&sc->hn_media); 480117406Skan#if defined(INET) || defined(INET6) 481117406Skan tcp_lro_free(&sc->hn_lro); 482117406Skan#endif 483117406Skan 484117406Skan return (0); 485117406Skan} 486117406Skan 487117406Skan/* 488117406Skan * Standard shutdown entry point 489117406Skan */ 490117406Skanstatic int 491117406Skannetvsc_shutdown(device_t dev) 492117406Skan{ 493117406Skan return (0); 494117406Skan} 495117406Skan 496117406Skan/* 497117406Skan * Send completion processing 498117406Skan * 499117406Skan * Note: It looks like offset 0 of buf is reserved to hold the softc 500117406Skan * pointer. The sc pointer is not currently needed in this function, and 501117406Skan * it is not presently populated by the TX function. 502117406Skan */ 503117406Skanvoid 504117406Skannetvsc_xmit_completion(void *context) 505117406Skan{ 506117406Skan netvsc_packet *packet = (netvsc_packet *)context; 507117406Skan struct mbuf *mb; 508117406Skan uint8_t *buf; 509117406Skan 510117406Skan mb = (struct mbuf *)(uintptr_t)packet->compl.send.send_completion_tid; 511117406Skan buf = ((uint8_t *)packet) - HV_NV_PACKET_OFFSET_IN_BUF; 512117406Skan 513117406Skan free(buf, M_NETVSC); 514117406Skan 515117406Skan if (mb != NULL) { 516117406Skan m_freem(mb); 517117406Skan } 518117406Skan} 519117406Skan 520117406Skan/* 521117406Skan * Start a transmit of one or more packets 522117406Skan */ 523117406Skanstatic int 524117406Skanhn_start_locked(struct ifnet *ifp) 525117406Skan{ 526117406Skan hn_softc_t *sc = ifp->if_softc; 527117406Skan struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev); 528117406Skan netvsc_dev *net_dev = sc->net_dev; 529117406Skan device_t dev = device_ctx->device; 530117406Skan uint8_t *buf; 531117406Skan netvsc_packet *packet; 532117406Skan struct mbuf *m_head, *m; 533117406Skan struct mbuf *mc_head = NULL; 534117406Skan struct ether_vlan_header *eh; 535117406Skan rndis_msg *rndis_mesg; 536117406Skan rndis_packet *rndis_pkt; 537117406Skan rndis_per_packet_info *rppi; 538117406Skan ndis_8021q_info *rppi_vlan_info; 539132729Skan rndis_tcp_ip_csum_info *csum_info; 540132729Skan rndis_tcp_tso_info *tso_info; 541132729Skan int ether_len; 542132729Skan int i; 543132729Skan int num_frags; 544132729Skan int len; 545132729Skan int retries = 0; 546117406Skan int ret = 0; 547117406Skan uint32_t rndis_msg_size = 0; 548117406Skan uint32_t trans_proto_type; 549117406Skan uint32_t send_buf_section_idx = 550117406Skan NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX; 551169699Skan 552169699Skan while (!IFQ_DRV_IS_EMPTY(&sc->hn_ifp->if_snd)) { 553169699Skan IFQ_DRV_DEQUEUE(&sc->hn_ifp->if_snd, m_head); 554169699Skan if (m_head == NULL) { 555169699Skan break; 556117406Skan } 557117406Skan 558117406Skan len = 0; 559117406Skan num_frags = 0; 560117406Skan 561117406Skan /* Walk the mbuf list computing total length and num frags */ 562117406Skan for (m = m_head; m != NULL; m = m->m_next) { 563117406Skan if (m->m_len != 0) { 564117406Skan num_frags++; 565117406Skan len += m->m_len; 566117406Skan } 567117406Skan } 568169699Skan 569169699Skan /* 570117406Skan * Reserve the number of pages requested. Currently, 571169699Skan * one page is reserved for the message in the RNDIS 572117406Skan * filter packet 573117406Skan */ 574117406Skan num_frags += HV_RF_NUM_TX_RESERVED_PAGE_BUFS; 575117406Skan 576117406Skan /* If exceeds # page_buffers in netvsc_packet */ 577169699Skan if (num_frags > NETVSC_PACKET_MAXPAGE) { 578169699Skan device_printf(dev, "exceed max page buffers,%d,%d\n", 579169699Skan num_frags, NETVSC_PACKET_MAXPAGE); 580117406Skan m_freem(m_head); 581169699Skan if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 582117406Skan return (EINVAL); 583117406Skan } 584117406Skan 585117406Skan /* 586117406Skan * Allocate a buffer with space for a netvsc packet plus a 587117406Skan * number of reserved areas. First comes a (currently 16 588117406Skan * bytes, currently unused) reserved data area. Second is 589103452Skan * the netvsc_packet. Third is an area reserved for an 590103452Skan * rndis_filter_packet struct. Fourth (optional) is a 591103452Skan * rndis_per_packet_info struct. 592103452Skan * Changed malloc to M_NOWAIT to avoid sleep under spin lock. 593103452Skan * No longer reserving extra space for page buffers, as they 59490075Sobrien * are already part of the netvsc_packet. 59590075Sobrien */ 59690075Sobrien buf = malloc(HV_NV_PACKET_OFFSET_IN_BUF + 59790075Sobrien sizeof(netvsc_packet) + 59890075Sobrien sizeof(rndis_msg) + 59990075Sobrien RNDIS_VLAN_PPI_SIZE + 60090075Sobrien RNDIS_TSO_PPI_SIZE + 60190075Sobrien RNDIS_CSUM_PPI_SIZE, 60290075Sobrien M_NETVSC, M_ZERO | M_NOWAIT); 60390075Sobrien if (buf == NULL) { 60490075Sobrien device_printf(dev, "hn:malloc packet failed\n"); 60590075Sobrien m_freem(m_head); 60690075Sobrien if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 60790075Sobrien return (ENOMEM); 60890075Sobrien } 60990075Sobrien 610117406Skan packet = (netvsc_packet *)(buf + HV_NV_PACKET_OFFSET_IN_BUF); 611117406Skan *(vm_offset_t *)buf = HV_NV_SC_PTR_OFFSET_IN_BUF; 612117406Skan 613117406Skan packet->is_data_pkt = TRUE; 61490075Sobrien 61590075Sobrien /* Set up the rndis header */ 61690075Sobrien packet->page_buf_count = num_frags; 61790075Sobrien 61890075Sobrien /* Initialize it from the mbuf */ 619132729Skan packet->tot_data_buf_len = len; 62090075Sobrien 621132729Skan /* 622132729Skan * extension points to the area reserved for the 623132729Skan * rndis_filter_packet, which is placed just after 624132729Skan * the netvsc_packet (and rppi struct, if present; 625132729Skan * length is updated later). 626132729Skan */ 627132729Skan packet->rndis_mesg = packet + 1; 62890075Sobrien rndis_mesg = (rndis_msg *)packet->rndis_mesg; 629169699Skan rndis_mesg->ndis_msg_type = REMOTE_NDIS_PACKET_MSG; 630132729Skan 631132729Skan rndis_pkt = &rndis_mesg->msg.packet; 632132729Skan rndis_pkt->data_offset = sizeof(rndis_packet); 633132729Skan rndis_pkt->data_length = packet->tot_data_buf_len; 634132729Skan rndis_pkt->per_pkt_info_offset = sizeof(rndis_packet); 635169699Skan 636132729Skan rndis_msg_size = RNDIS_MESSAGE_SIZE(rndis_packet); 637132729Skan 638132729Skan /* 639132729Skan * If the Hyper-V infrastructure needs to embed a VLAN tag, 640132729Skan * initialize netvsc_packet and rppi struct values as needed. 641132729Skan */ 642132729Skan if (m_head->m_flags & M_VLANTAG) { 643132729Skan /* 644132729Skan * set up some additional fields so the Hyper-V infrastructure will stuff the VLAN tag 645132729Skan * into the frame. 646169699Skan */ 647132729Skan packet->vlan_tci = m_head->m_pkthdr.ether_vtag; 648132729Skan 649169699Skan rndis_msg_size += RNDIS_VLAN_PPI_SIZE; 650132729Skan 65190075Sobrien rppi = hv_set_rppi_data(rndis_mesg, RNDIS_VLAN_PPI_SIZE, 65290075Sobrien ieee_8021q_info); 653132729Skan 654132729Skan /* VLAN info immediately follows rppi struct */ 655117406Skan rppi_vlan_info = (ndis_8021q_info *)((char*)rppi + 656132729Skan rppi->per_packet_info_offset); 657169699Skan /* FreeBSD does not support CFI or priority */ 658169699Skan rppi_vlan_info->u1.s1.vlan_id = 659132729Skan packet->vlan_tci & 0xfff; 660132729Skan } 661169699Skan 662169699Skan /* Only check the flags for outbound and ignore the ones for inbound */ 66390075Sobrien if (0 == (m_head->m_pkthdr.csum_flags & HV_CSUM_FOR_OUTBOUND)) { 664169699Skan goto pre_send; 665169699Skan } 66690075Sobrien 66790075Sobrien eh = mtod(m_head, struct ether_vlan_header*); 66890075Sobrien if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 66990075Sobrien ether_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 670132729Skan } else { 67190075Sobrien ether_len = ETHER_HDR_LEN; 672132729Skan } 673132729Skan 67490075Sobrien trans_proto_type = get_transport_proto_type(m_head); 67590075Sobrien if (TRANSPORT_TYPE_NOT_IP == trans_proto_type) { 676132729Skan goto pre_send; 67790075Sobrien } 67890075Sobrien 67990075Sobrien /* 680132729Skan * TSO packet needless to setup the send side checksum 68190075Sobrien * offload. 682169699Skan */ 683169699Skan if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 684132729Skan goto do_tso; 68590075Sobrien } 686169699Skan 687169699Skan /* setup checksum offload */ 688132729Skan rndis_msg_size += RNDIS_CSUM_PPI_SIZE; 68990075Sobrien rppi = hv_set_rppi_data(rndis_mesg, RNDIS_CSUM_PPI_SIZE, 690132729Skan tcpip_chksum_info); 691132729Skan csum_info = (rndis_tcp_ip_csum_info *)((char*)rppi + 692132729Skan rppi->per_packet_info_offset); 693132729Skan 694132729Skan if (trans_proto_type & (TYPE_IPV4 << 16)) { 695132729Skan csum_info->xmit.is_ipv4 = 1; 696132729Skan } else { 697132729Skan csum_info->xmit.is_ipv6 = 1; 698132729Skan } 699132729Skan 700132729Skan if (trans_proto_type & TYPE_TCP) { 701169699Skan csum_info->xmit.tcp_csum = 1; 70290075Sobrien csum_info->xmit.tcp_header_offset = 0; 703132729Skan } else if (trans_proto_type & TYPE_UDP) { 704132729Skan csum_info->xmit.udp_csum = 1; 705132729Skan } 706132729Skan 707132729Skan goto pre_send; 708169699Skan 70990075Sobriendo_tso: 71090075Sobrien /* setup TCP segmentation offload */ 711169699Skan rndis_msg_size += RNDIS_TSO_PPI_SIZE; 712169699Skan rppi = hv_set_rppi_data(rndis_mesg, RNDIS_TSO_PPI_SIZE, 713169699Skan tcp_large_send_info); 714169699Skan 71590075Sobrien tso_info = (rndis_tcp_tso_info *)((char *)rppi + 716169699Skan rppi->per_packet_info_offset); 717169699Skan tso_info->lso_v2_xmit.type = 718169699Skan RNDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; 71990075Sobrien 72090075Sobrien#ifdef INET 72190075Sobrien if (trans_proto_type & (TYPE_IPV4 << 16)) { 72290075Sobrien struct ip *ip = 72390075Sobrien (struct ip *)(m_head->m_data + ether_len); 724169699Skan unsigned long iph_len = ip->ip_hl << 2; 72590075Sobrien struct tcphdr *th = 72690075Sobrien (struct tcphdr *)((caddr_t)ip + iph_len); 72790075Sobrien 72890075Sobrien tso_info->lso_v2_xmit.ip_version = 72990075Sobrien RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV4; 73090075Sobrien ip->ip_len = 0; 73190075Sobrien ip->ip_sum = 0; 73290075Sobrien 73390075Sobrien th->th_sum = in_pseudo(ip->ip_src.s_addr, 73490075Sobrien ip->ip_dst.s_addr, 73590075Sobrien htons(IPPROTO_TCP)); 736132729Skan } 737132729Skan#endif 73890075Sobrien#if defined(INET6) && defined(INET) 73990075Sobrien else 74090075Sobrien#endif 74190075Sobrien#ifdef INET6 74290075Sobrien { 74390075Sobrien struct ip6_hdr *ip6 = 74490075Sobrien (struct ip6_hdr *)(m_head->m_data + ether_len); 74590075Sobrien struct tcphdr *th = (struct tcphdr *)(ip6 + 1); 74690075Sobrien 74790075Sobrien tso_info->lso_v2_xmit.ip_version = 74890075Sobrien RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV6; 74990075Sobrien ip6->ip6_plen = 0; 750132729Skan th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 751132729Skan } 752132729Skan#endif 753132729Skan tso_info->lso_v2_xmit.tcp_header_offset = 0; 75490075Sobrien tso_info->lso_v2_xmit.mss = m_head->m_pkthdr.tso_segsz; 75590075Sobrien 75690075Sobrienpre_send: 75790075Sobrien rndis_mesg->msg_len = packet->tot_data_buf_len + rndis_msg_size; 758132729Skan packet->tot_data_buf_len = rndis_mesg->msg_len; 759132729Skan 760169699Skan /* send packet with send buffer */ 761169699Skan if (packet->tot_data_buf_len < net_dev->send_section_size) { 762169699Skan send_buf_section_idx = 763169699Skan hv_nv_get_next_send_section(net_dev); 764169699Skan if (send_buf_section_idx != 765132729Skan NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) { 76690075Sobrien char *dest = ((char *)net_dev->send_buf + 76790075Sobrien send_buf_section_idx * 76890075Sobrien net_dev->send_section_size); 76990075Sobrien 770132729Skan memcpy(dest, rndis_mesg, rndis_msg_size); 77190075Sobrien dest += rndis_msg_size; 772132729Skan for (m = m_head; m != NULL; m = m->m_next) { 77390075Sobrien if (m->m_len) { 77490075Sobrien memcpy(dest, 775132729Skan (void *)mtod(m, vm_offset_t), 77690075Sobrien m->m_len); 777132729Skan dest += m->m_len; 77890075Sobrien } 779169699Skan } 78090075Sobrien 781169699Skan packet->send_buf_section_idx = 782169699Skan send_buf_section_idx; 783132729Skan packet->send_buf_section_size = 78490075Sobrien packet->tot_data_buf_len; 785169699Skan packet->page_buf_count = 0; 786169699Skan goto do_send; 787132729Skan } 78890075Sobrien } 78990075Sobrien 79090075Sobrien /* send packet with page buffer */ 791132729Skan packet->page_buffers[0].pfn = 792169699Skan atop(hv_get_phys_addr(rndis_mesg)); 79390075Sobrien packet->page_buffers[0].offset = 794161660Skan (unsigned long)rndis_mesg & PAGE_MASK; 795161660Skan packet->page_buffers[0].length = rndis_msg_size; 796132729Skan 79790075Sobrien /* 798169699Skan * Fill the page buffers with mbuf info starting at index 79990075Sobrien * HV_RF_NUM_TX_RESERVED_PAGE_BUFS. 800132729Skan */ 801117406Skan i = HV_RF_NUM_TX_RESERVED_PAGE_BUFS; 802132729Skan for (m = m_head; m != NULL; m = m->m_next) { 803117406Skan if (m->m_len) { 804132729Skan vm_offset_t paddr = 805132729Skan vtophys(mtod(m, vm_offset_t)); 806169699Skan packet->page_buffers[i].pfn = 807132729Skan paddr >> PAGE_SHIFT; 80890075Sobrien packet->page_buffers[i].offset = 80990075Sobrien paddr & (PAGE_SIZE - 1); 81090075Sobrien packet->page_buffers[i].length = m->m_len; 81190075Sobrien i++; 81290075Sobrien } 813169699Skan } 814169699Skan 815169699Skan packet->send_buf_section_idx = 816169699Skan NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX; 817169699Skan packet->send_buf_section_size = 0; 818169699Skan 819169699Skando_send: 820169699Skan 82190075Sobrien /* 82290075Sobrien * If bpf, copy the mbuf chain. This is less expensive than 82390075Sobrien * it appears; the mbuf clusters are not copied, only their 824132729Skan * reference counts are incremented. 825132729Skan * Needed to avoid a race condition where the completion 82690075Sobrien * callback is invoked, freeing the mbuf chain, before the 827132729Skan * bpf_mtap code has a chance to run. 828117406Skan */ 829169699Skan if (ifp->if_bpf) { 830169699Skan mc_head = m_copypacket(m_head, M_DONTWAIT); 831169699Skan } 832169699Skanretry_send: 833169699Skan /* Set the completion routine */ 834169699Skan packet->compl.send.on_send_completion = netvsc_xmit_completion; 835169699Skan packet->compl.send.send_completion_context = packet; 836169699Skan packet->compl.send.send_completion_tid = (uint64_t)(uintptr_t)m_head; 837169699Skan 838169699Skan /* Removed critical_enter(), does not appear necessary */ 839169699Skan ret = hv_nv_on_send(device_ctx, packet); 840169699Skan if (ret == 0) { 841169699Skan ifp->if_opackets++; 842169699Skan /* if bpf && mc_head, call bpf_mtap code */ 843169699Skan if (mc_head) { 844169699Skan ETHER_BPF_MTAP(ifp, mc_head); 845169699Skan } 846169699Skan } else { 847169699Skan retries++; 848169699Skan if (retries < 4) { 849169699Skan goto retry_send; 850169699Skan } 851169699Skan 852169699Skan IF_PREPEND(&ifp->if_snd, m_head); 853169699Skan ifp->if_drv_flags |= IFF_DRV_OACTIVE; 854169699Skan 855169699Skan /* 856169699Skan * Null the mbuf pointer so the completion function 857169699Skan * does not free the mbuf chain. We just pushed the 858169699Skan * mbuf chain back on the if_snd queue. 859169699Skan */ 860132729Skan packet->compl.send.send_completion_tid = 0; 861132729Skan 862132729Skan /* 863132729Skan * Release the resources since we will not get any 864132729Skan * send completion 865132729Skan */ 866169699Skan netvsc_xmit_completion(packet); 867169699Skan if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 868169699Skan } 869169699Skan 870169699Skan /* if bpf && mc_head, free the mbuf chain copy */ 871169699Skan if (mc_head) { 872132729Skan m_freem(mc_head); 873132729Skan } 874132729Skan } 875132729Skan 876132729Skan return (ret); 877169699Skan} 878169699Skan 879169699Skan/* 880169699Skan * Link up/down notification 881169699Skan */ 882132729Skanvoid 883132729Skannetvsc_linkstatus_callback(struct hv_device *device_obj, uint32_t status) 884169699Skan{ 885169699Skan hn_softc_t *sc = device_get_softc(device_obj->device); 886169699Skan 887169699Skan if (sc == NULL) { 888169699Skan return; 889132729Skan } 890169699Skan 891169699Skan if (status == 1) { 892169699Skan sc->hn_carrier = 1; 893169699Skan } else { 894169699Skan sc->hn_carrier = 0; 895169699Skan } 896169699Skan} 897169699Skan 898169699Skan/* 899169699Skan * Append the specified data to the indicated mbuf chain, 900169699Skan * Extend the mbuf chain if the new data does not fit in 901169699Skan * existing space. 902169699Skan * 903169699Skan * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c. 904169699Skan * There should be an equivalent in the kernel mbuf code, 905169699Skan * but there does not appear to be one yet. 906169699Skan * 907169699Skan * Differs from m_append() in that additional mbufs are 908169699Skan * allocated with cluster size MJUMPAGESIZE, and filled 909169699Skan * accordingly. 910169699Skan * 911169699Skan * Return 1 if able to complete the job; otherwise 0. 912169699Skan */ 913169699Skanstatic int 914169699Skanhv_m_append(struct mbuf *m0, int len, c_caddr_t cp) 915169699Skan{ 916169699Skan struct mbuf *m, *n; 917169699Skan int remainder, space; 918169699Skan 919169699Skan for (m = m0; m->m_next != NULL; m = m->m_next) 920169699Skan ; 921169699Skan remainder = len; 922169699Skan space = M_TRAILINGSPACE(m); 923169699Skan if (space > 0) { 924169699Skan /* 925169699Skan * Copy into available space. 926169699Skan */ 927169699Skan if (space > remainder) 928169699Skan space = remainder; 929169699Skan bcopy(cp, mtod(m, caddr_t) + m->m_len, space); 930169699Skan m->m_len += space; 931169699Skan cp += space; 932169699Skan remainder -= space; 933169699Skan } 934169699Skan while (remainder > 0) { 935169699Skan /* 936169699Skan * Allocate a new mbuf; could check space 937132729Skan * and allocate a cluster instead. 938132729Skan */ 939132729Skan n = m_getjcl(M_DONTWAIT, m->m_type, 0, MJUMPAGESIZE); 940132729Skan if (n == NULL) 941132729Skan break; 942132729Skan n->m_len = min(MJUMPAGESIZE, remainder); 943132729Skan bcopy(cp, mtod(n, caddr_t), n->m_len); 944169699Skan cp += n->m_len; 945132729Skan remainder -= n->m_len; 946169699Skan m->m_next = n; 947169699Skan m = n; 948169699Skan } 949169699Skan if (m0->m_flags & M_PKTHDR) 950169699Skan m0->m_pkthdr.len += len - remainder; 951169699Skan 952169699Skan return (remainder == 0); 953169699Skan} 954169699Skan 955169699Skan 956169699Skan/* 957169699Skan * Called when we receive a data packet from the "wire" on the 958169699Skan * specified device 959169699Skan * 960169699Skan * Note: This is no longer used as a callback 961169699Skan */ 962169699Skanint 963169699Skannetvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet, 964169699Skan rndis_tcp_ip_csum_info *csum_info) 965169699Skan{ 966169699Skan hn_softc_t *sc = (hn_softc_t *)device_get_softc(device_ctx->device); 967169699Skan struct mbuf *m_new; 968169699Skan struct ifnet *ifp; 969169699Skan device_t dev = device_ctx->device; 97090075Sobrien int size, do_lro = 0; 971 972 if (sc == NULL) { 973 return (0); /* TODO: KYS how can this be! */ 974 } 975 976 ifp = sc->hn_ifp; 977 978 ifp = sc->arpcom.ac_ifp; 979 980 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 981 return (0); 982 } 983 984 /* 985 * Bail out if packet contains more data than configured MTU. 986 */ 987 if (packet->tot_data_buf_len > (ifp->if_mtu + ETHER_HDR_LEN)) { 988 return (0); 989 } else if (packet->tot_data_buf_len <= MHLEN) { 990 m_new = m_gethdr(M_NOWAIT, MT_DATA); 991 if (m_new == NULL) 992 return (0); 993 memcpy(mtod(m_new, void *), packet->data, 994 packet->tot_data_buf_len); 995 m_new->m_pkthdr.len = m_new->m_len = packet->tot_data_buf_len; 996 sc->hn_small_pkts++; 997 } else { 998 /* 999 * Get an mbuf with a cluster. For packets 2K or less, 1000 * get a standard 2K cluster. For anything larger, get a 1001 * 4K cluster. Any buffers larger than 4K can cause problems 1002 * if looped around to the Hyper-V TX channel, so avoid them. 1003 */ 1004 size = MCLBYTES; 1005 if (packet->tot_data_buf_len > MCLBYTES) { 1006 /* 4096 */ 1007 size = MJUMPAGESIZE; 1008 } 1009 1010 m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size); 1011 if (m_new == NULL) { 1012 device_printf(dev, "alloc mbuf failed.\n"); 1013 return (0); 1014 } 1015 1016 hv_m_append(m_new, packet->tot_data_buf_len, packet->data); 1017 } 1018 m_new->m_pkthdr.rcvif = ifp; 1019 1020 /* receive side checksum offload */ 1021 if (NULL != csum_info) { 1022 /* IP csum offload */ 1023 if (csum_info->receive.ip_csum_succeeded) { 1024 m_new->m_pkthdr.csum_flags |= 1025 (CSUM_IP_CHECKED | CSUM_IP_VALID); 1026 sc->hn_csum_ip++; 1027 } 1028 1029 /* TCP csum offload */ 1030 if (csum_info->receive.tcp_csum_succeeded) { 1031 m_new->m_pkthdr.csum_flags |= 1032 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1033 m_new->m_pkthdr.csum_data = 0xffff; 1034 sc->hn_csum_tcp++; 1035 } 1036 1037 if (csum_info->receive.ip_csum_succeeded && 1038 csum_info->receive.tcp_csum_succeeded) 1039 do_lro = 1; 1040 } else { 1041 const struct ether_header *eh; 1042 uint16_t etype; 1043 int hoff; 1044 1045 hoff = sizeof(*eh); 1046 if (m_new->m_len < hoff) 1047 goto skip; 1048 eh = mtod(m_new, struct ether_header *); 1049 etype = ntohs(eh->ether_type); 1050 if (etype == ETHERTYPE_VLAN) { 1051 const struct ether_vlan_header *evl; 1052 1053 hoff = sizeof(*evl); 1054 if (m_new->m_len < hoff) 1055 goto skip; 1056 evl = mtod(m_new, struct ether_vlan_header *); 1057 etype = ntohs(evl->evl_proto); 1058 } 1059 1060 if (etype == ETHERTYPE_IP) { 1061 int pr; 1062 1063 pr = hn_check_iplen(m_new, hoff); 1064 if (pr == IPPROTO_TCP) { 1065 if (sc->hn_trust_hosttcp) { 1066 sc->hn_csum_trusted++; 1067 m_new->m_pkthdr.csum_flags |= 1068 (CSUM_IP_CHECKED | CSUM_IP_VALID | 1069 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1070 m_new->m_pkthdr.csum_data = 0xffff; 1071 } 1072 /* Rely on SW csum verification though... */ 1073 do_lro = 1; 1074 } 1075 } 1076 } 1077skip: 1078 if ((packet->vlan_tci != 0) && 1079 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) { 1080 m_new->m_pkthdr.ether_vtag = packet->vlan_tci; 1081 m_new->m_flags |= M_VLANTAG; 1082 } 1083 1084 /* 1085 * Note: Moved RX completion back to hv_nv_on_receive() so all 1086 * messages (not just data messages) will trigger a response. 1087 */ 1088 1089 ifp->if_ipackets++; 1090 1091 if ((ifp->if_capenable & IFCAP_LRO) && do_lro) { 1092#if defined(INET) || defined(INET6) 1093 struct lro_ctrl *lro = &sc->hn_lro; 1094 1095 if (lro->lro_cnt) { 1096 sc->hn_lro_tried++; 1097 if (tcp_lro_rx(lro, m_new, 0) == 0) { 1098 /* DONE! */ 1099 return 0; 1100 } 1101 } 1102#endif 1103 } 1104 1105 /* We're not holding the lock here, so don't release it */ 1106 (*ifp->if_input)(ifp, m_new); 1107 1108 return (0); 1109} 1110 1111void 1112netvsc_recv_rollup(struct hv_device *device_ctx) 1113{ 1114#if defined(INET) || defined(INET6) 1115 hn_softc_t *sc = device_get_softc(device_ctx->device); 1116 struct lro_ctrl *lro = &sc->hn_lro; 1117 struct lro_entry *queued; 1118 1119 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { 1120 SLIST_REMOVE_HEAD(&lro->lro_active, next); 1121 tcp_lro_flush(lro, queued); 1122 } 1123#endif 1124} 1125 1126/* 1127 * Rules for using sc->temp_unusable: 1128 * 1. sc->temp_unusable can only be read or written while holding NV_LOCK() 1129 * 2. code reading sc->temp_unusable under NV_LOCK(), and finding 1130 * sc->temp_unusable set, must release NV_LOCK() and exit 1131 * 3. to retain exclusive control of the interface, 1132 * sc->temp_unusable must be set by code before releasing NV_LOCK() 1133 * 4. only code setting sc->temp_unusable can clear sc->temp_unusable 1134 * 5. code setting sc->temp_unusable must eventually clear sc->temp_unusable 1135 */ 1136 1137/* 1138 * Standard ioctl entry point. Called when the user wants to configure 1139 * the interface. 1140 */ 1141static int 1142hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1143{ 1144 hn_softc_t *sc = ifp->if_softc; 1145 struct ifreq *ifr = (struct ifreq *)data; 1146#ifdef INET 1147 struct ifaddr *ifa = (struct ifaddr *)data; 1148#endif 1149 netvsc_device_info device_info; 1150 struct hv_device *hn_dev; 1151 int mask, error = 0; 1152 int retry_cnt = 500; 1153 1154 switch(cmd) { 1155 1156 case SIOCSIFADDR: 1157#ifdef INET 1158 if (ifa->ifa_addr->sa_family == AF_INET) { 1159 ifp->if_flags |= IFF_UP; 1160 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) 1161 hn_ifinit(sc); 1162 arp_ifinit(ifp, ifa); 1163 } else 1164#endif 1165 error = ether_ioctl(ifp, cmd, data); 1166 break; 1167 case SIOCSIFMTU: 1168 hn_dev = vmbus_get_devctx(sc->hn_dev); 1169 1170 /* Check MTU value change */ 1171 if (ifp->if_mtu == ifr->ifr_mtu) 1172 break; 1173 1174 if (ifr->ifr_mtu > NETVSC_MAX_CONFIGURABLE_MTU) { 1175 error = EINVAL; 1176 break; 1177 } 1178 1179 /* Obtain and record requested MTU */ 1180 ifp->if_mtu = ifr->ifr_mtu; 1181 /* 1182 * Make sure that LRO high watermark is still valid, 1183 * after MTU change (the 2*MTU limit). 1184 */ 1185 if (!HN_LRO_HIWAT_ISVALID(sc, sc->hn_lro_hiwat)) 1186 hn_set_lro_hiwat(sc, HN_LRO_HIWAT_MTULIM(ifp)); 1187 1188 do { 1189 NV_LOCK(sc); 1190 if (!sc->temp_unusable) { 1191 sc->temp_unusable = TRUE; 1192 retry_cnt = -1; 1193 } 1194 NV_UNLOCK(sc); 1195 if (retry_cnt > 0) { 1196 retry_cnt--; 1197 DELAY(5 * 1000); 1198 } 1199 } while (retry_cnt > 0); 1200 1201 if (retry_cnt == 0) { 1202 error = EINVAL; 1203 break; 1204 } 1205 1206 /* We must remove and add back the device to cause the new 1207 * MTU to take effect. This includes tearing down, but not 1208 * deleting the channel, then bringing it back up. 1209 */ 1210 error = hv_rf_on_device_remove(hn_dev, HV_RF_NV_RETAIN_CHANNEL); 1211 if (error) { 1212 NV_LOCK(sc); 1213 sc->temp_unusable = FALSE; 1214 NV_UNLOCK(sc); 1215 break; 1216 } 1217 error = hv_rf_on_device_add(hn_dev, &device_info); 1218 if (error) { 1219 NV_LOCK(sc); 1220 sc->temp_unusable = FALSE; 1221 NV_UNLOCK(sc); 1222 break; 1223 } 1224 1225 hn_ifinit_locked(sc); 1226 1227 NV_LOCK(sc); 1228 sc->temp_unusable = FALSE; 1229 NV_UNLOCK(sc); 1230 break; 1231 case SIOCSIFFLAGS: 1232 do { 1233 NV_LOCK(sc); 1234 if (!sc->temp_unusable) { 1235 sc->temp_unusable = TRUE; 1236 retry_cnt = -1; 1237 } 1238 NV_UNLOCK(sc); 1239 if (retry_cnt > 0) { 1240 retry_cnt--; 1241 DELAY(5 * 1000); 1242 } 1243 } while (retry_cnt > 0); 1244 1245 if (retry_cnt == 0) { 1246 error = EINVAL; 1247 break; 1248 } 1249 1250 if (ifp->if_flags & IFF_UP) { 1251 /* 1252 * If only the state of the PROMISC flag changed, 1253 * then just use the 'set promisc mode' command 1254 * instead of reinitializing the entire NIC. Doing 1255 * a full re-init means reloading the firmware and 1256 * waiting for it to start up, which may take a 1257 * second or two. 1258 */ 1259#ifdef notyet 1260 /* Fixme: Promiscuous mode? */ 1261 if (ifp->if_drv_flags & IFF_DRV_RUNNING && 1262 ifp->if_flags & IFF_PROMISC && 1263 !(sc->hn_if_flags & IFF_PROMISC)) { 1264 /* do something here for Hyper-V */ 1265 } else if (ifp->if_drv_flags & IFF_DRV_RUNNING && 1266 !(ifp->if_flags & IFF_PROMISC) && 1267 sc->hn_if_flags & IFF_PROMISC) { 1268 /* do something here for Hyper-V */ 1269 } else 1270#endif 1271 hn_ifinit_locked(sc); 1272 } else { 1273 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1274 hn_stop(sc); 1275 } 1276 } 1277 NV_LOCK(sc); 1278 sc->temp_unusable = FALSE; 1279 NV_UNLOCK(sc); 1280 sc->hn_if_flags = ifp->if_flags; 1281 error = 0; 1282 break; 1283 case SIOCSIFCAP: 1284 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 1285 if (mask & IFCAP_TXCSUM) { 1286 if (IFCAP_TXCSUM & ifp->if_capenable) { 1287 ifp->if_capenable &= ~IFCAP_TXCSUM; 1288 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP); 1289 } else { 1290 ifp->if_capenable |= IFCAP_TXCSUM; 1291 /* 1292 * Only enable UDP checksum offloading on 1293 * Windows Server 2012R2 or later releases. 1294 */ 1295 if (hv_vmbus_protocal_version >= 1296 HV_VMBUS_VERSION_WIN8_1) { 1297 ifp->if_hwassist |= 1298 (CSUM_TCP | CSUM_UDP); 1299 } else { 1300 ifp->if_hwassist |= CSUM_TCP; 1301 } 1302 } 1303 } 1304 1305 if (mask & IFCAP_RXCSUM) { 1306 if (IFCAP_RXCSUM & ifp->if_capenable) { 1307 ifp->if_capenable &= ~IFCAP_RXCSUM; 1308 } else { 1309 ifp->if_capenable |= IFCAP_RXCSUM; 1310 } 1311 } 1312 if (mask & IFCAP_LRO) 1313 ifp->if_capenable ^= IFCAP_LRO; 1314 1315 if (mask & IFCAP_TSO4) { 1316 ifp->if_capenable ^= IFCAP_TSO4; 1317 ifp->if_hwassist ^= CSUM_IP_TSO; 1318 } 1319 1320 if (mask & IFCAP_TSO6) { 1321 ifp->if_capenable ^= IFCAP_TSO6; 1322 ifp->if_hwassist ^= CSUM_IP6_TSO; 1323 } 1324 1325 error = 0; 1326 break; 1327 case SIOCADDMULTI: 1328 case SIOCDELMULTI: 1329#ifdef notyet 1330 /* Fixme: Multicast mode? */ 1331 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1332 NV_LOCK(sc); 1333 netvsc_setmulti(sc); 1334 NV_UNLOCK(sc); 1335 error = 0; 1336 } 1337#endif 1338 error = EINVAL; 1339 break; 1340 case SIOCSIFMEDIA: 1341 case SIOCGIFMEDIA: 1342 error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd); 1343 break; 1344 default: 1345 error = ether_ioctl(ifp, cmd, data); 1346 break; 1347 } 1348 1349 return (error); 1350} 1351 1352/* 1353 * 1354 */ 1355static void 1356hn_stop(hn_softc_t *sc) 1357{ 1358 struct ifnet *ifp; 1359 int ret; 1360 struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev); 1361 1362 ifp = sc->hn_ifp; 1363 1364 if (bootverbose) 1365 printf(" Closing Device ...\n"); 1366 1367 ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); 1368 if_link_state_change(ifp, LINK_STATE_DOWN); 1369 sc->hn_initdone = 0; 1370 1371 ret = hv_rf_on_close(device_ctx); 1372} 1373 1374/* 1375 * FreeBSD transmit entry point 1376 */ 1377static void 1378hn_start(struct ifnet *ifp) 1379{ 1380 hn_softc_t *sc; 1381 1382 sc = ifp->if_softc; 1383 NV_LOCK(sc); 1384 if (sc->temp_unusable) { 1385 NV_UNLOCK(sc); 1386 return; 1387 } 1388 hn_start_locked(ifp); 1389 NV_UNLOCK(sc); 1390} 1391 1392/* 1393 * 1394 */ 1395static void 1396hn_ifinit_locked(hn_softc_t *sc) 1397{ 1398 struct ifnet *ifp; 1399 struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev); 1400 int ret; 1401 1402 ifp = sc->hn_ifp; 1403 1404 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1405 return; 1406 } 1407 1408 hv_promisc_mode = 1; 1409 1410 ret = hv_rf_on_open(device_ctx); 1411 if (ret != 0) { 1412 return; 1413 } else { 1414 sc->hn_initdone = 1; 1415 } 1416 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1417 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 1418 if_link_state_change(ifp, LINK_STATE_UP); 1419} 1420 1421/* 1422 * 1423 */ 1424static void 1425hn_ifinit(void *xsc) 1426{ 1427 hn_softc_t *sc = xsc; 1428 1429 NV_LOCK(sc); 1430 if (sc->temp_unusable) { 1431 NV_UNLOCK(sc); 1432 return; 1433 } 1434 sc->temp_unusable = TRUE; 1435 NV_UNLOCK(sc); 1436 1437 hn_ifinit_locked(sc); 1438 1439 NV_LOCK(sc); 1440 sc->temp_unusable = FALSE; 1441 NV_UNLOCK(sc); 1442} 1443 1444#ifdef LATER 1445/* 1446 * 1447 */ 1448static void 1449hn_watchdog(struct ifnet *ifp) 1450{ 1451 hn_softc_t *sc; 1452 sc = ifp->if_softc; 1453 1454 printf("hn%d: watchdog timeout -- resetting\n", sc->hn_unit); 1455 hn_ifinit(sc); /*???*/ 1456 ifp->if_oerrors++; 1457} 1458#endif 1459 1460#ifdef HN_LRO_HIWAT 1461static int 1462hn_lro_hiwat_sysctl(SYSCTL_HANDLER_ARGS) 1463{ 1464 struct hn_softc *sc = arg1; 1465 int hiwat, error; 1466 1467 hiwat = sc->hn_lro_hiwat; 1468 error = sysctl_handle_int(oidp, &hiwat, 0, req); 1469 if (error || req->newptr == NULL) 1470 return error; 1471 1472 if (!HN_LRO_HIWAT_ISVALID(sc, hiwat)) 1473 return EINVAL; 1474 1475 if (sc->hn_lro_hiwat != hiwat) 1476 hn_set_lro_hiwat(sc, hiwat); 1477 return 0; 1478} 1479#endif /* HN_LRO_HIWAT */ 1480 1481static int 1482hn_check_iplen(const struct mbuf *m, int hoff) 1483{ 1484 const struct ip *ip; 1485 int len, iphlen, iplen; 1486 const struct tcphdr *th; 1487 int thoff; /* TCP data offset */ 1488 1489 len = hoff + sizeof(struct ip); 1490 1491 /* The packet must be at least the size of an IP header. */ 1492 if (m->m_pkthdr.len < len) 1493 return IPPROTO_DONE; 1494 1495 /* The fixed IP header must reside completely in the first mbuf. */ 1496 if (m->m_len < len) 1497 return IPPROTO_DONE; 1498 1499 ip = mtodo(m, hoff); 1500 1501 /* Bound check the packet's stated IP header length. */ 1502 iphlen = ip->ip_hl << 2; 1503 if (iphlen < sizeof(struct ip)) /* minimum header length */ 1504 return IPPROTO_DONE; 1505 1506 /* The full IP header must reside completely in the one mbuf. */ 1507 if (m->m_len < hoff + iphlen) 1508 return IPPROTO_DONE; 1509 1510 iplen = ntohs(ip->ip_len); 1511 1512 /* 1513 * Check that the amount of data in the buffers is as 1514 * at least much as the IP header would have us expect. 1515 */ 1516 if (m->m_pkthdr.len < hoff + iplen) 1517 return IPPROTO_DONE; 1518 1519 /* 1520 * Ignore IP fragments. 1521 */ 1522 if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF)) 1523 return IPPROTO_DONE; 1524 1525 /* 1526 * The TCP/IP or UDP/IP header must be entirely contained within 1527 * the first fragment of a packet. 1528 */ 1529 switch (ip->ip_p) { 1530 case IPPROTO_TCP: 1531 if (iplen < iphlen + sizeof(struct tcphdr)) 1532 return IPPROTO_DONE; 1533 if (m->m_len < hoff + iphlen + sizeof(struct tcphdr)) 1534 return IPPROTO_DONE; 1535 th = (const struct tcphdr *)((const uint8_t *)ip + iphlen); 1536 thoff = th->th_off << 2; 1537 if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen) 1538 return IPPROTO_DONE; 1539 if (m->m_len < hoff + iphlen + thoff) 1540 return IPPROTO_DONE; 1541 break; 1542 case IPPROTO_UDP: 1543 if (iplen < iphlen + sizeof(struct udphdr)) 1544 return IPPROTO_DONE; 1545 if (m->m_len < hoff + iphlen + sizeof(struct udphdr)) 1546 return IPPROTO_DONE; 1547 break; 1548 default: 1549 if (iplen < iphlen) 1550 return IPPROTO_DONE; 1551 break; 1552 } 1553 return ip->ip_p; 1554} 1555 1556static device_method_t netvsc_methods[] = { 1557 /* Device interface */ 1558 DEVMETHOD(device_probe, netvsc_probe), 1559 DEVMETHOD(device_attach, netvsc_attach), 1560 DEVMETHOD(device_detach, netvsc_detach), 1561 DEVMETHOD(device_shutdown, netvsc_shutdown), 1562 1563 { 0, 0 } 1564}; 1565 1566static driver_t netvsc_driver = { 1567 NETVSC_DEVNAME, 1568 netvsc_methods, 1569 sizeof(hn_softc_t) 1570}; 1571 1572static devclass_t netvsc_devclass; 1573 1574DRIVER_MODULE(hn, vmbus, netvsc_driver, netvsc_devclass, 0, 0); 1575MODULE_VERSION(hn, 1); 1576MODULE_DEPEND(hn, vmbus, 1, 1, 1); 1577