1283625Sdim/*- 2283625Sdim * Copyright (c) 2010-2012 Citrix Inc. 3283625Sdim * Copyright (c) 2009-2012 Microsoft Corp. 4283625Sdim * Copyright (c) 2012 NetApp Inc. 5283625Sdim * All rights reserved. 6283625Sdim * 7283625Sdim * Redistribution and use in source and binary forms, with or without 8283625Sdim * modification, are permitted provided that the following conditions 9283625Sdim * are met: 10283625Sdim * 1. Redistributions of source code must retain the above copyright 11283625Sdim * notice unmodified, this list of conditions, and the following 12283625Sdim * disclaimer. 13283625Sdim * 2. Redistributions in binary form must reproduce the above copyright 14283625Sdim * notice, this list of conditions and the following disclaimer in the 15284236Sdim * documentation and/or other materials provided with the distribution. 16284236Sdim * 17283625Sdim * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18296417Sdim * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19283625Sdim * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20284236Sdim * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21296417Sdim * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22284236Sdim * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23284236Sdim * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24284236Sdim * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25283625Sdim * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26296417Sdim * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27296417Sdim */ 28296417Sdim 29296417Sdim/*- 30296417Sdim * Copyright (c) 2004-2006 Kip Macy 31296417Sdim * All rights reserved. 32296417Sdim * 33296417Sdim * Redistribution and use in source and binary forms, with or without 34296417Sdim * modification, are permitted provided that the following conditions 35296417Sdim * are met: 36296417Sdim * 1. Redistributions of source code must retain the above copyright 37296417Sdim * notice, this list of conditions and the following disclaimer. 38296417Sdim * 2. Redistributions in binary form must reproduce the above copyright 39296417Sdim * notice, this list of conditions and the following disclaimer in the 40296417Sdim * documentation and/or other materials provided with the distribution. 41296417Sdim * 42296417Sdim * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 43296417Sdim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 44296417Sdim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 45284236Sdim * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 46284236Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 47284236Sdim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 48296417Sdim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 49283625Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 50283625Sdim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 51284236Sdim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 52284236Sdim * SUCH DAMAGE. 53284236Sdim */ 54284236Sdim 55284236Sdim#include <sys/cdefs.h> 56284236Sdim__FBSDID("$FreeBSD: releng/10.3/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c 295948 2016-02-24 01:30:50Z sephe $"); 57284236Sdim 58296417Sdim#include "opt_inet6.h" 59296417Sdim#include "opt_inet.h" 60296417Sdim 61296417Sdim#include <sys/param.h> 62296417Sdim#include <sys/systm.h> 63296417Sdim#include <sys/sockio.h> 64296417Sdim#include <sys/mbuf.h> 65296417Sdim#include <sys/malloc.h> 66296417Sdim#include <sys/module.h> 67296417Sdim#include <sys/kernel.h> 68296417Sdim#include <sys/socket.h> 69296417Sdim#include <sys/queue.h> 70296417Sdim#include <sys/lock.h> 71296417Sdim#include <sys/sx.h> 72296417Sdim#include <sys/sysctl.h> 73296417Sdim 74296417Sdim#include <net/if.h> 75296417Sdim#include <net/if_arp.h> 76296417Sdim#include <net/ethernet.h> 77296417Sdim#include <net/if_dl.h> 78296417Sdim#include <net/if_media.h> 79296417Sdim 80296417Sdim#include <net/bpf.h> 81296417Sdim 82296417Sdim#include <net/if_types.h> 83296417Sdim#include <net/if_vlan_var.h> 84296417Sdim#include <net/if.h> 85296417Sdim 86296417Sdim#include <netinet/in_systm.h> 87296417Sdim#include <netinet/in.h> 88296417Sdim#include <netinet/ip.h> 89296417Sdim#include <netinet/if_ether.h> 90296417Sdim#include <netinet/tcp.h> 91296417Sdim#include <netinet/udp.h> 92284734Sdim#include <netinet/ip6.h> 93284734Sdim 94284734Sdim#include <vm/vm.h> 95284734Sdim#include <vm/vm_param.h> 96284734Sdim#include <vm/vm_kern.h> 97284734Sdim#include <vm/pmap.h> 98284734Sdim 99296417Sdim#include <machine/bus.h> 100296417Sdim#include <machine/resource.h> 101284734Sdim#include <machine/frame.h> 102309124Sdim#include <machine/vmparam.h> 103309124Sdim 104284734Sdim#include <sys/bus.h> 105284734Sdim#include <sys/rman.h> 106284734Sdim#include <sys/mutex.h> 107296417Sdim#include <sys/errno.h> 108296417Sdim#include <sys/types.h> 109296417Sdim#include <machine/atomic.h> 110296417Sdim 111296417Sdim#include <machine/intr_machdep.h> 112296417Sdim 113296417Sdim#include <machine/in_cksum.h> 114284236Sdim 115284236Sdim#include <dev/hyperv/include/hyperv.h> 116284236Sdim#include "hv_net_vsc.h" 117284236Sdim#include "hv_rndis.h" 118284236Sdim#include "hv_rndis_filter.h" 119284236Sdim 120284236Sdim 121284236Sdim/* Short for Hyper-V network interface */ 122284236Sdim#define NETVSC_DEVNAME "hn" 123284236Sdim 124284236Sdim/* 125284236Sdim * It looks like offset 0 of buf is reserved to hold the softc pointer. 126284236Sdim * The sc pointer evidently not needed, and is not presently populated. 127284236Sdim * The packet offset is where the netvsc_packet starts in the buffer. 128296417Sdim */ 129296417Sdim#define HV_NV_SC_PTR_OFFSET_IN_BUF 0 130296417Sdim#define HV_NV_PACKET_OFFSET_IN_BUF 16 131296417Sdim 132296417Sdim/* YYY should get it from the underlying channel */ 133296417Sdim#define HN_TX_DESC_CNT 512 134296417Sdim 135296417Sdim#define HN_RNDIS_MSG_LEN \ 136296417Sdim (sizeof(rndis_msg) + \ 137296417Sdim RNDIS_VLAN_PPI_SIZE + \ 138296417Sdim RNDIS_TSO_PPI_SIZE + \ 139296417Sdim RNDIS_CSUM_PPI_SIZE) 140296417Sdim#define HN_RNDIS_MSG_BOUNDARY PAGE_SIZE 141296417Sdim#define HN_RNDIS_MSG_ALIGN CACHE_LINE_SIZE 142296417Sdim 143296417Sdim#define HN_TX_DATA_BOUNDARY PAGE_SIZE 144296417Sdim#define HN_TX_DATA_MAXSIZE IP_MAXPACKET 145296417Sdim#define HN_TX_DATA_SEGSIZE PAGE_SIZE 146296417Sdim#define HN_TX_DATA_SEGCNT_MAX \ 147296417Sdim (NETVSC_PACKET_MAXPAGE - HV_RF_NUM_TX_RESERVED_PAGE_BUFS) 148296417Sdim 149284236Sdimstruct hn_txdesc { 150284236Sdim SLIST_ENTRY(hn_txdesc) link; 151284236Sdim struct mbuf *m; 152284236Sdim struct hn_softc *sc; 153284236Sdim int refs; 154284236Sdim uint32_t flags; /* HN_TXD_FLAG_ */ 155284236Sdim netvsc_packet netvsc_pkt; /* XXX to be removed */ 156284236Sdim 157284236Sdim bus_dmamap_t data_dmap; 158284236Sdim 159284236Sdim bus_addr_t rndis_msg_paddr; 160284236Sdim rndis_msg *rndis_msg; 161296417Sdim bus_dmamap_t rndis_msg_dmap; 162296417Sdim}; 163309124Sdim 164309124Sdim#define HN_TXD_FLAG_ONLIST 0x1 165296417Sdim#define HN_TXD_FLAG_DMAMAP 0x2 166296417Sdim 167296417Sdim/* 168283625Sdim * A unified flag for all outbound check sum flags is useful, 169283625Sdim * and it helps avoiding unnecessary check sum calculation in 170283625Sdim * network forwarding scenario. 171283625Sdim */ 172283625Sdim#define HV_CSUM_FOR_OUTBOUND \ 173283625Sdim (CSUM_IP|CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP|CSUM_IP_TSO| \ 174283625Sdim CSUM_IP_ISCSI|CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP| \ 175283625Sdim CSUM_IP6_TSO|CSUM_IP6_ISCSI) 176283625Sdim 177283625Sdim/* XXX move to netinet/tcp_lro.h */ 178283625Sdim#define HN_LRO_HIWAT_MAX 65535 179283625Sdim#define HN_LRO_HIWAT_DEF HN_LRO_HIWAT_MAX 180283625Sdim/* YYY 2*MTU is a bit rough, but should be good enough. */ 181283625Sdim#define HN_LRO_HIWAT_MTULIM(ifp) (2 * (ifp)->if_mtu) 182283625Sdim#define HN_LRO_HIWAT_ISVALID(sc, hiwat) \ 183283625Sdim ((hiwat) >= HN_LRO_HIWAT_MTULIM((sc)->hn_ifp) || \ 184283625Sdim (hiwat) <= HN_LRO_HIWAT_MAX) 185283625Sdim 186283625Sdim/* 187283625Sdim * Be aware that this sleepable mutex will exhibit WITNESS errors when 188283625Sdim * certain TCP and ARP code paths are taken. This appears to be a 189283625Sdim * well-known condition, as all other drivers checked use a sleeping 190283625Sdim * mutex to protect their transmit paths. 191283625Sdim * Also Be aware that mutexes do not play well with semaphores, and there 192283625Sdim * is a conflicting semaphore in a certain channel code path. 193283625Sdim */ 194309124Sdim#define NV_LOCK_INIT(_sc, _name) \ 195309124Sdim mtx_init(&(_sc)->hn_lock, _name, MTX_NETWORK_LOCK, MTX_DEF) 196309124Sdim#define NV_LOCK(_sc) mtx_lock(&(_sc)->hn_lock) 197309124Sdim#define NV_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->hn_lock, MA_OWNED) 198309124Sdim#define NV_UNLOCK(_sc) mtx_unlock(&(_sc)->hn_lock) 199309124Sdim#define NV_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->hn_lock) 200309124Sdim 201309124Sdim 202309124Sdim/* 203309124Sdim * Globals 204309124Sdim */ 205309124Sdim 206309124Sdimint hv_promisc_mode = 0; /* normal mode by default */ 207309124Sdim 208309124Sdim/* Trust tcp segements verification on host side. */ 209309124Sdimstatic int hn_trust_hosttcp = 0; 210309124SdimTUNABLE_INT("dev.hn.trust_hosttcp", &hn_trust_hosttcp); 211309124Sdim 212309124Sdim#if __FreeBSD_version >= 1100045 213309124Sdim/* Limit TSO burst size */ 214309124Sdimstatic int hn_tso_maxlen = 0; 215309124SdimTUNABLE_INT("dev.hn.tso_maxlen", &hn_tso_maxlen); 216309124Sdim#endif 217309124Sdim 218309124Sdim/* Limit chimney send size */ 219309124Sdimstatic int hn_tx_chimney_size = 0; 220309124SdimTUNABLE_INT("dev.hn.tx_chimney_size", &hn_tx_chimney_size); 221309124Sdim 222309124Sdim/* 223309124Sdim * Forward declarations 224309124Sdim */ 225309124Sdimstatic void hn_stop(hn_softc_t *sc); 226309124Sdimstatic void hn_ifinit_locked(hn_softc_t *sc); 227309124Sdimstatic void hn_ifinit(void *xsc); 228309124Sdimstatic int hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); 229309124Sdimstatic void hn_start_locked(struct ifnet *ifp); 230309124Sdimstatic void hn_start(struct ifnet *ifp); 231309124Sdimstatic int hn_ifmedia_upd(struct ifnet *ifp); 232309124Sdimstatic void hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); 233309124Sdim#ifdef HN_LRO_HIWAT 234309124Sdimstatic int hn_lro_hiwat_sysctl(SYSCTL_HANDLER_ARGS); 235309124Sdim#endif 236309124Sdimstatic int hn_tx_chimney_size_sysctl(SYSCTL_HANDLER_ARGS); 237309124Sdimstatic int hn_check_iplen(const struct mbuf *, int); 238309124Sdimstatic int hn_create_tx_ring(struct hn_softc *sc); 239309124Sdimstatic void hn_destroy_tx_ring(struct hn_softc *sc); 240309124Sdim 241309124Sdimstatic __inline void 242309124Sdimhn_set_lro_hiwat(struct hn_softc *sc, int hiwat) 243309124Sdim{ 244309124Sdim sc->hn_lro_hiwat = hiwat; 245309124Sdim#ifdef HN_LRO_HIWAT 246309124Sdim sc->hn_lro.lro_hiwat = sc->hn_lro_hiwat; 247309124Sdim#endif 248309124Sdim} 249284236Sdim 250284236Sdim/* 251284236Sdim * NetVsc get message transport protocol type 252284236Sdim */ 253284236Sdimstatic uint32_t get_transport_proto_type(struct mbuf *m_head) 254284236Sdim{ 255284236Sdim uint32_t ret_val = TRANSPORT_TYPE_NOT_IP; 256284236Sdim uint16_t ether_type = 0; 257284236Sdim int ether_len = 0; 258284236Sdim struct ether_vlan_header *eh; 259284236Sdim#ifdef INET 260284236Sdim struct ip *iph; 261284236Sdim#endif 262284236Sdim#ifdef INET6 263284236Sdim struct ip6_hdr *ip6; 264284236Sdim#endif 265284236Sdim 266284236Sdim eh = mtod(m_head, struct ether_vlan_header*); 267283625Sdim if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 268283625Sdim ether_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 269283625Sdim ether_type = eh->evl_proto; 270283625Sdim } else { 271283625Sdim ether_len = ETHER_HDR_LEN; 272283625Sdim ether_type = eh->evl_encap_proto; 273283625Sdim } 274283625Sdim 275283625Sdim switch (ntohs(ether_type)) { 276283625Sdim#ifdef INET6 277283625Sdim case ETHERTYPE_IPV6: 278283625Sdim ip6 = (struct ip6_hdr *)(m_head->m_data + ether_len); 279283625Sdim 280283625Sdim if (IPPROTO_TCP == ip6->ip6_nxt) { 281283625Sdim ret_val = TRANSPORT_TYPE_IPV6_TCP; 282283625Sdim } else if (IPPROTO_UDP == ip6->ip6_nxt) { 283283625Sdim ret_val = TRANSPORT_TYPE_IPV6_UDP; 284283625Sdim } 285283625Sdim break; 286283625Sdim#endif 287283625Sdim#ifdef INET 288283625Sdim case ETHERTYPE_IP: 289283625Sdim iph = (struct ip *)(m_head->m_data + ether_len); 290283625Sdim 291283625Sdim if (IPPROTO_TCP == iph->ip_p) { 292283625Sdim ret_val = TRANSPORT_TYPE_IPV4_TCP; 293283625Sdim } else if (IPPROTO_UDP == iph->ip_p) { 294283625Sdim ret_val = TRANSPORT_TYPE_IPV4_UDP; 295283625Sdim } 296283625Sdim break; 297283625Sdim#endif 298283625Sdim default: 299283625Sdim ret_val = TRANSPORT_TYPE_NOT_IP; 300283625Sdim break; 301283625Sdim } 302283625Sdim 303283625Sdim return (ret_val); 304283625Sdim} 305283625Sdim 306283625Sdimstatic int 307283625Sdimhn_ifmedia_upd(struct ifnet *ifp __unused) 308283625Sdim{ 309283625Sdim 310283625Sdim return EOPNOTSUPP; 311283625Sdim} 312283625Sdim 313283625Sdimstatic void 314284236Sdimhn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 315283625Sdim{ 316284236Sdim struct hn_softc *sc = ifp->if_softc; 317284236Sdim 318284236Sdim ifmr->ifm_status = IFM_AVALID; 319284236Sdim ifmr->ifm_active = IFM_ETHER; 320284236Sdim 321284236Sdim if (!sc->hn_carrier) { 322283625Sdim ifmr->ifm_active |= IFM_NONE; 323283625Sdim return; 324283625Sdim } 325283625Sdim ifmr->ifm_status |= IFM_ACTIVE; 326283625Sdim ifmr->ifm_active |= IFM_10G_T | IFM_FDX; 327283625Sdim} 328283625Sdim 329283625Sdim/* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */ 330283625Sdimstatic const hv_guid g_net_vsc_device_type = { 331283625Sdim .data = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46, 332283625Sdim 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E} 333296417Sdim}; 334296417Sdim 335296417Sdim/* 336296417Sdim * Standard probe entry point. 337296417Sdim * 338296417Sdim */ 339296417Sdimstatic int 340296417Sdimnetvsc_probe(device_t dev) 341296417Sdim{ 342296417Sdim const char *p; 343296417Sdim 344296417Sdim p = vmbus_get_type(dev); 345296417Sdim if (!memcmp(p, &g_net_vsc_device_type.data, sizeof(hv_guid))) { 346296417Sdim device_set_desc(dev, "Synthetic Network Interface"); 347296417Sdim if (bootverbose) 348296417Sdim printf("Netvsc probe... DONE \n"); 349296417Sdim 350296417Sdim return (BUS_PROBE_DEFAULT); 351296417Sdim } 352284236Sdim 353284236Sdim return (ENXIO); 354284236Sdim} 355284236Sdim 356284236Sdim/* 357284236Sdim * Standard attach entry point. 358284236Sdim * 359284236Sdim * Called when the driver is loaded. It allocates needed resources, 360284236Sdim * and initializes the "hardware" and software. 361284236Sdim */ 362284236Sdimstatic int 363284236Sdimnetvsc_attach(device_t dev) 364284236Sdim{ 365284236Sdim struct hv_device *device_ctx = vmbus_get_devctx(dev); 366284236Sdim netvsc_device_info device_info; 367283625Sdim hn_softc_t *sc; 368283625Sdim int unit = device_get_unit(dev); 369283625Sdim struct ifnet *ifp = NULL; 370283625Sdim struct sysctl_oid_list *child; 371283625Sdim struct sysctl_ctx_list *ctx; 372283625Sdim int error; 373283625Sdim#if __FreeBSD_version >= 1100045 374283625Sdim int tso_maxlen; 375284236Sdim#endif 376284236Sdim 377284236Sdim sc = device_get_softc(dev); 378284236Sdim if (sc == NULL) { 379284236Sdim return (ENOMEM); 380284236Sdim } 381284236Sdim 382284236Sdim bzero(sc, sizeof(hn_softc_t)); 383284236Sdim sc->hn_unit = unit; 384284236Sdim sc->hn_dev = dev; 385284236Sdim sc->hn_lro_hiwat = HN_LRO_HIWAT_DEF; 386284236Sdim sc->hn_trust_hosttcp = hn_trust_hosttcp; 387284236Sdim 388284236Sdim error = hn_create_tx_ring(sc); 389284236Sdim if (error) 390284236Sdim goto failed; 391284236Sdim 392284236Sdim NV_LOCK_INIT(sc, "NetVSCLock"); 393284236Sdim 394284236Sdim sc->hn_dev_obj = device_ctx; 395284236Sdim 396284236Sdim ifp = sc->hn_ifp = sc->arpcom.ac_ifp = if_alloc(IFT_ETHER); 397284236Sdim ifp->if_softc = sc; 398284236Sdim 399284236Sdim if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 400284236Sdim ifp->if_dunit = unit; 401283625Sdim ifp->if_dname = NETVSC_DEVNAME; 402283625Sdim 403283625Sdim ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 404283625Sdim ifp->if_ioctl = hn_ioctl; 405283625Sdim ifp->if_start = hn_start; 406283625Sdim ifp->if_init = hn_ifinit; 407283625Sdim /* needed by hv_rf_on_device_add() code */ 408296417Sdim ifp->if_mtu = ETHERMTU; 409296417Sdim IFQ_SET_MAXLEN(&ifp->if_snd, 512); 410296417Sdim ifp->if_snd.ifq_drv_maxlen = 511; 411296417Sdim IFQ_SET_READY(&ifp->if_snd); 412296417Sdim 413296417Sdim ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts); 414296417Sdim ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL); 415284236Sdim ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO); 416284236Sdim /* XXX ifmedia_set really should do this for us */ 417284236Sdim sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media; 418284236Sdim 419284236Sdim /* 420284236Sdim * Tell upper layers that we support full VLAN capability. 421284236Sdim */ 422284236Sdim ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); 423284236Sdim ifp->if_capabilities |= 424284236Sdim IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO | 425284236Sdim IFCAP_LRO; 426284236Sdim ifp->if_capenable |= 427283625Sdim IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO | 428283625Sdim IFCAP_LRO; 429283625Sdim /* 430283625Sdim * Only enable UDP checksum offloading when it is on 2012R2 or 431283625Sdim * later. UDP checksum offloading doesn't work on earlier 432283625Sdim * Windows releases. 433283625Sdim */ 434296417Sdim if (hv_vmbus_protocal_version >= HV_VMBUS_VERSION_WIN8_1) 435296417Sdim ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 436296417Sdim else 437296417Sdim ifp->if_hwassist = CSUM_TCP | CSUM_TSO; 438296417Sdim 439284236Sdim error = hv_rf_on_device_add(device_ctx, &device_info); 440284236Sdim if (error) 441284236Sdim goto failed; 442284236Sdim 443284236Sdim if (device_info.link_state == 0) { 444284236Sdim sc->hn_carrier = 1; 445284236Sdim } 446284236Sdim 447284236Sdim#if defined(INET) || defined(INET6) 448283625Sdim tcp_lro_init(&sc->hn_lro); 449283625Sdim /* Driver private LRO settings */ 450283625Sdim sc->hn_lro.ifp = ifp; 451283625Sdim#ifdef HN_LRO_HIWAT 452283625Sdim sc->hn_lro.lro_hiwat = sc->hn_lro_hiwat; 453283625Sdim#endif 454283625Sdim#endif /* INET || INET6 */ 455283625Sdim 456283625Sdim#if __FreeBSD_version >= 1100045 457283625Sdim tso_maxlen = hn_tso_maxlen; 458283625Sdim if (tso_maxlen <= 0 || tso_maxlen > IP_MAXPACKET) 459283625Sdim tso_maxlen = IP_MAXPACKET; 460309124Sdim 461309124Sdim ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX; 462309124Sdim ifp->if_hw_tsomaxsegsize = PAGE_SIZE; 463309124Sdim ifp->if_hw_tsomax = tso_maxlen - 464309124Sdim (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 465309124Sdim#endif 466296417Sdim 467296417Sdim ether_ifattach(ifp, device_info.mac_addr); 468296417Sdim 469296417Sdim#if __FreeBSD_version >= 1100045 470296417Sdim if_printf(ifp, "TSO: %u/%u/%u\n", ifp->if_hw_tsomax, 471296417Sdim ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize); 472283625Sdim#endif 473296417Sdim 474296417Sdim sc->hn_tx_chimney_max = sc->net_dev->send_section_size; 475296417Sdim sc->hn_tx_chimney_size = sc->hn_tx_chimney_max; 476296417Sdim if (hn_tx_chimney_size > 0 && 477283625Sdim hn_tx_chimney_size < sc->hn_tx_chimney_max) 478296417Sdim sc->hn_tx_chimney_size = hn_tx_chimney_size; 479309124Sdim 480309124Sdim ctx = device_get_sysctl_ctx(dev); 481309124Sdim child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 482296417Sdim 483296417Sdim SYSCTL_ADD_INT(ctx, child, OID_AUTO, "lro_queued", 484283625Sdim CTLFLAG_RW, &sc->hn_lro.lro_queued, 0, "LRO queued"); 485296417Sdim SYSCTL_ADD_INT(ctx, child, OID_AUTO, "lro_flushed", 486296417Sdim CTLFLAG_RW, &sc->hn_lro.lro_flushed, 0, "LRO flushed"); 487296417Sdim SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "lro_tried", 488283625Sdim CTLFLAG_RW, &sc->hn_lro_tried, "# of LRO tries"); 489283625Sdim#ifdef HN_LRO_HIWAT 490283625Sdim SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_hiwat", 491283625Sdim CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_lro_hiwat_sysctl, 492283625Sdim "I", "LRO high watermark"); 493283625Sdim#endif 494283625Sdim SYSCTL_ADD_INT(ctx, child, OID_AUTO, "trust_hosttcp", 495283625Sdim CTLFLAG_RW, &sc->hn_trust_hosttcp, 0, 496283625Sdim "Trust tcp segement verification on host side, " 497283625Sdim "when csum info is missing"); 498283625Sdim SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "csum_ip", 499283625Sdim CTLFLAG_RW, &sc->hn_csum_ip, "RXCSUM IP"); 500283625Sdim SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "csum_tcp", 501283625Sdim CTLFLAG_RW, &sc->hn_csum_tcp, "RXCSUM TCP"); 502284236Sdim SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "csum_trusted", 503284236Sdim CTLFLAG_RW, &sc->hn_csum_trusted, 504284236Sdim "# of TCP segements that we trust host's csum verification"); 505284236Sdim SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "small_pkts", 506284236Sdim CTLFLAG_RW, &sc->hn_small_pkts, "# of small packets received"); 507284236Sdim SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "no_txdescs", 508284236Sdim CTLFLAG_RW, &sc->hn_no_txdescs, "# of times short of TX descs"); 509284236Sdim SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "send_failed", 510284236Sdim CTLFLAG_RW, &sc->hn_send_failed, "# of hyper-v sending failure"); 511284236Sdim SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "txdma_failed", 512284236Sdim CTLFLAG_RW, &sc->hn_txdma_failed, "# of TX DMA failure"); 513284236Sdim SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_collapsed", 514284236Sdim CTLFLAG_RW, &sc->hn_tx_collapsed, "# of TX mbuf collapsed"); 515284236Sdim SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_chimney", 516284236Sdim CTLFLAG_RW, &sc->hn_tx_chimney, "# of chimney send"); 517284236Sdim SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt", 518284236Sdim CTLFLAG_RD, &sc->hn_txdesc_cnt, 0, "# of total TX descs"); 519284236Sdim SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail", 520284236Sdim CTLFLAG_RD, &sc->hn_txdesc_avail, 0, "# of available TX descs"); 521284236Sdim SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max", 522284236Sdim CTLFLAG_RD, &sc->hn_tx_chimney_max, 0, 523284236Sdim "Chimney send packet size upper boundary"); 524284236Sdim SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size", 525284236Sdim CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_tx_chimney_size_sysctl, 526284236Sdim "I", "Chimney send packet size limit"); 527284236Sdim 528283625Sdim if (unit == 0) { 529283625Sdim struct sysctl_ctx_list *dc_ctx; 530283625Sdim struct sysctl_oid_list *dc_child; 531283625Sdim devclass_t dc; 532283625Sdim 533283625Sdim /* 534283625Sdim * Add sysctl nodes for devclass 535283625Sdim */ 536283625Sdim dc = device_get_devclass(dev); 537283625Sdim dc_ctx = devclass_get_sysctl_ctx(dc); 538283625Sdim dc_child = SYSCTL_CHILDREN(devclass_get_sysctl_tree(dc)); 539283625Sdim 540283625Sdim SYSCTL_ADD_INT(dc_ctx, dc_child, OID_AUTO, "trust_hosttcp", 541283625Sdim CTLFLAG_RD, &hn_trust_hosttcp, 0, 542283625Sdim "Trust tcp segement verification on host side, " 543283625Sdim "when csum info is missing (global setting)"); 544284236Sdim SYSCTL_ADD_INT(dc_ctx, dc_child, OID_AUTO, "tx_chimney_size", 545284236Sdim CTLFLAG_RD, &hn_tx_chimney_size, 0, 546284236Sdim "Chimney send packet size limit"); 547284236Sdim#if __FreeBSD_version >= 1100045 548283625Sdim SYSCTL_ADD_INT(dc_ctx, dc_child, OID_AUTO, "tso_maxlen", 549283625Sdim CTLFLAG_RD, &hn_tso_maxlen, 0, "TSO burst limit"); 550284236Sdim#endif 551284236Sdim } 552284236Sdim 553284236Sdim return (0); 554283625Sdimfailed: 555283625Sdim hn_destroy_tx_ring(sc); 556296417Sdim if (ifp != NULL) 557296417Sdim if_free(ifp); 558296417Sdim return (error); 559296417Sdim} 560296417Sdim 561296417Sdim/* 562296417Sdim * Standard detach entry point 563296417Sdim */ 564296417Sdimstatic int 565296417Sdimnetvsc_detach(device_t dev) 566296417Sdim{ 567296417Sdim struct hn_softc *sc = device_get_softc(dev); 568296417Sdim struct hv_device *hv_device = vmbus_get_devctx(dev); 569284236Sdim 570284236Sdim if (bootverbose) 571284236Sdim printf("netvsc_detach\n"); 572284236Sdim 573284236Sdim /* 574284236Sdim * XXXKYS: Need to clean up all our 575284236Sdim * driver state; this is the driver 576284236Sdim * unloading. 577284236Sdim */ 578284236Sdim 579284236Sdim /* 580283625Sdim * XXXKYS: Need to stop outgoing traffic and unregister 581283625Sdim * the netdevice. 582283625Sdim */ 583283625Sdim 584283625Sdim hv_rf_on_device_remove(hv_device, HV_RF_NV_DESTROY_CHANNEL); 585283625Sdim 586283625Sdim ifmedia_removeall(&sc->hn_media); 587283625Sdim#if defined(INET) || defined(INET6) 588283625Sdim tcp_lro_free(&sc->hn_lro); 589296417Sdim#endif 590296417Sdim hn_destroy_tx_ring(sc); 591296417Sdim 592296417Sdim return (0); 593296417Sdim} 594296417Sdim 595296417Sdim/* 596296417Sdim * Standard shutdown entry point 597296417Sdim */ 598296417Sdimstatic int 599296417Sdimnetvsc_shutdown(device_t dev) 600296417Sdim{ 601309124Sdim return (0); 602309124Sdim} 603309124Sdim 604309124Sdimstatic __inline int 605309124Sdimhn_txdesc_dmamap_load(struct hn_softc *sc, struct hn_txdesc *txd, 606309124Sdim struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs) 607309124Sdim{ 608309124Sdim struct mbuf *m = *m_head; 609309124Sdim int error; 610309124Sdim 611309124Sdim error = bus_dmamap_load_mbuf_sg(sc->hn_tx_data_dtag, txd->data_dmap, 612309124Sdim m, segs, nsegs, BUS_DMA_NOWAIT); 613309124Sdim if (error == EFBIG) { 614309124Sdim struct mbuf *m_new; 615309124Sdim 616309124Sdim m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX); 617309124Sdim if (m_new == NULL) 618309124Sdim return ENOBUFS; 619309124Sdim else 620309124Sdim *m_head = m = m_new; 621309124Sdim sc->hn_tx_collapsed++; 622309124Sdim 623309124Sdim error = bus_dmamap_load_mbuf_sg(sc->hn_tx_data_dtag, 624309124Sdim txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); 625309124Sdim } 626309124Sdim if (!error) { 627309124Sdim bus_dmamap_sync(sc->hn_tx_data_dtag, txd->data_dmap, 628309124Sdim BUS_DMASYNC_PREWRITE); 629309124Sdim txd->flags |= HN_TXD_FLAG_DMAMAP; 630309124Sdim } 631309124Sdim return error; 632309124Sdim} 633309124Sdim 634309124Sdimstatic __inline void 635309124Sdimhn_txdesc_dmamap_unload(struct hn_softc *sc, struct hn_txdesc *txd) 636309124Sdim{ 637309124Sdim 638309124Sdim if (txd->flags & HN_TXD_FLAG_DMAMAP) { 639309124Sdim bus_dmamap_sync(sc->hn_tx_data_dtag, 640309124Sdim txd->data_dmap, BUS_DMASYNC_POSTWRITE); 641309124Sdim bus_dmamap_unload(sc->hn_tx_data_dtag, 642309124Sdim txd->data_dmap); 643309124Sdim txd->flags &= ~HN_TXD_FLAG_DMAMAP; 644309124Sdim } 645309124Sdim} 646309124Sdim 647309124Sdimstatic __inline int 648309124Sdimhn_txdesc_put(struct hn_softc *sc, struct hn_txdesc *txd) 649309124Sdim{ 650309124Sdim 651309124Sdim KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0, 652309124Sdim ("put an onlist txd %#x", txd->flags)); 653309124Sdim 654309124Sdim KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); 655309124Sdim if (atomic_fetchadd_int(&txd->refs, -1) != 1) 656309124Sdim return 0; 657309124Sdim 658309124Sdim hn_txdesc_dmamap_unload(sc, txd); 659309124Sdim if (txd->m != NULL) { 660309124Sdim m_freem(txd->m); 661284236Sdim txd->m = NULL; 662284236Sdim } 663284236Sdim 664283625Sdim txd->flags |= HN_TXD_FLAG_ONLIST; 665283625Sdim 666284236Sdim mtx_lock_spin(&sc->hn_txlist_spin); 667284236Sdim KASSERT(sc->hn_txdesc_avail >= 0 && 668284236Sdim sc->hn_txdesc_avail < sc->hn_txdesc_cnt, 669284236Sdim ("txdesc_put: invalid txd avail %d", sc->hn_txdesc_avail)); 670283625Sdim sc->hn_txdesc_avail++; 671283625Sdim SLIST_INSERT_HEAD(&sc->hn_txlist, txd, link); 672296417Sdim mtx_unlock_spin(&sc->hn_txlist_spin); 673296417Sdim 674296417Sdim return 1; 675296417Sdim} 676296417Sdim 677296417Sdimstatic __inline struct hn_txdesc * 678296417Sdimhn_txdesc_get(struct hn_softc *sc) 679296417Sdim{ 680296417Sdim struct hn_txdesc *txd; 681296417Sdim 682296417Sdim mtx_lock_spin(&sc->hn_txlist_spin); 683296417Sdim txd = SLIST_FIRST(&sc->hn_txlist); 684296417Sdim if (txd != NULL) { 685296417Sdim KASSERT(sc->hn_txdesc_avail > 0, 686296417Sdim ("txdesc_get: invalid txd avail %d", sc->hn_txdesc_avail)); 687296417Sdim sc->hn_txdesc_avail--; 688296417Sdim SLIST_REMOVE_HEAD(&sc->hn_txlist, link); 689296417Sdim } 690296417Sdim mtx_unlock_spin(&sc->hn_txlist_spin); 691296417Sdim 692296417Sdim if (txd != NULL) { 693309124Sdim KASSERT(txd->m == NULL && txd->refs == 0 && 694309124Sdim (txd->flags & HN_TXD_FLAG_ONLIST), ("invalid txd")); 695309124Sdim txd->flags &= ~HN_TXD_FLAG_ONLIST; 696309124Sdim txd->refs = 1; 697309124Sdim } 698309124Sdim return txd; 699309124Sdim} 700309124Sdim 701309124Sdimstatic __inline void 702309124Sdimhn_txdesc_hold(struct hn_txdesc *txd) 703309124Sdim{ 704309124Sdim 705309124Sdim /* 0->1 transition will never work */ 706309124Sdim KASSERT(txd->refs > 0, ("invalid refs %d", txd->refs)); 707309124Sdim atomic_add_int(&txd->refs, 1); 708309124Sdim} 709309124Sdim 710309124Sdim/* 711309124Sdim * Send completion processing 712296417Sdim * 713284236Sdim * Note: It looks like offset 0 of buf is reserved to hold the softc 714284236Sdim * pointer. The sc pointer is not currently needed in this function, and 715284236Sdim * it is not presently populated by the TX function. 716284236Sdim */ 717284236Sdimvoid 718284236Sdimnetvsc_xmit_completion(void *context) 719284236Sdim{ 720296417Sdim netvsc_packet *packet = context; 721283625Sdim struct hn_txdesc *txd; 722283625Sdim struct hn_softc *sc; 723284236Sdim 724284236Sdim txd = (struct hn_txdesc *)(uintptr_t) 725284236Sdim packet->compl.send.send_completion_tid; 726284236Sdim 727284236Sdim sc = txd->sc; 728284236Sdim sc->hn_txeof = 1; 729284236Sdim hn_txdesc_put(sc, txd); 730284236Sdim} 731284236Sdim 732284236Sdimvoid 733284236Sdimnetvsc_channel_rollup(struct hv_device *device_ctx) 734284236Sdim{ 735284236Sdim struct hn_softc *sc = device_get_softc(device_ctx->device); 736284236Sdim struct ifnet *ifp; 737284236Sdim 738284236Sdim if (!sc->hn_txeof) 739284236Sdim return; 740284236Sdim 741283625Sdim sc->hn_txeof = 0; 742284236Sdim ifp = sc->hn_ifp; 743284236Sdim NV_LOCK(sc); 744284236Sdim ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 745284236Sdim hn_start_locked(ifp); 746284236Sdim NV_UNLOCK(sc); 747284236Sdim} 748284236Sdim 749284236Sdim/* 750284236Sdim * Start a transmit of one or more packets 751284236Sdim */ 752284236Sdimstatic void 753284236Sdimhn_start_locked(struct ifnet *ifp) 754284236Sdim{ 755283625Sdim hn_softc_t *sc = ifp->if_softc; 756284236Sdim struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev); 757284236Sdim netvsc_dev *net_dev = sc->net_dev; 758284236Sdim netvsc_packet *packet; 759284236Sdim struct mbuf *m_head, *m; 760284236Sdim struct ether_vlan_header *eh; 761284236Sdim rndis_msg *rndis_mesg; 762284236Sdim rndis_packet *rndis_pkt; 763296417Sdim rndis_per_packet_info *rppi; 764296417Sdim ndis_8021q_info *rppi_vlan_info; 765296417Sdim rndis_tcp_ip_csum_info *csum_info; 766296417Sdim rndis_tcp_tso_info *tso_info; 767296417Sdim int ether_len; 768296417Sdim uint32_t rndis_msg_size = 0; 769296417Sdim uint32_t trans_proto_type; 770296417Sdim uint32_t send_buf_section_idx = 771296417Sdim NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX; 772296417Sdim 773296417Sdim if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != 774296417Sdim IFF_DRV_RUNNING) 775296417Sdim return; 776309124Sdim 777309124Sdim while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 778309124Sdim bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX]; 779309124Sdim int error, nsegs, i, send_failed = 0; 780309124Sdim struct hn_txdesc *txd; 781309124Sdim 782309124Sdim IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); 783309124Sdim if (m_head == NULL) 784309124Sdim break; 785296417Sdim 786284236Sdim txd = hn_txdesc_get(sc); 787284236Sdim if (txd == NULL) { 788284236Sdim sc->hn_no_txdescs++; 789284236Sdim IF_PREPEND(&ifp->if_snd, m_head); 790284236Sdim ifp->if_drv_flags |= IFF_DRV_OACTIVE; 791309124Sdim break; 792309124Sdim } 793309124Sdim 794309124Sdim packet = &txd->netvsc_pkt; 795309124Sdim /* XXX not necessary */ 796309124Sdim memset(packet, 0, sizeof(*packet)); 797309124Sdim 798309124Sdim packet->is_data_pkt = TRUE; 799309124Sdim 800309124Sdim /* Initialize it from the mbuf */ 801309124Sdim packet->tot_data_buf_len = m_head->m_pkthdr.len; 802309124Sdim 803285181Sdim /* 804309124Sdim * extension points to the area reserved for the 805 * rndis_filter_packet, which is placed just after 806 * the netvsc_packet (and rppi struct, if present; 807 * length is updated later). 808 */ 809 rndis_mesg = txd->rndis_msg; 810 /* XXX not necessary */ 811 memset(rndis_mesg, 0, HN_RNDIS_MSG_LEN); 812 rndis_mesg->ndis_msg_type = REMOTE_NDIS_PACKET_MSG; 813 814 rndis_pkt = &rndis_mesg->msg.packet; 815 rndis_pkt->data_offset = sizeof(rndis_packet); 816 rndis_pkt->data_length = packet->tot_data_buf_len; 817 rndis_pkt->per_pkt_info_offset = sizeof(rndis_packet); 818 819 rndis_msg_size = RNDIS_MESSAGE_SIZE(rndis_packet); 820 821 /* 822 * If the Hyper-V infrastructure needs to embed a VLAN tag, 823 * initialize netvsc_packet and rppi struct values as needed. 824 */ 825 if (m_head->m_flags & M_VLANTAG) { 826 /* 827 * set up some additional fields so the Hyper-V infrastructure will stuff the VLAN tag 828 * into the frame. 829 */ 830 rndis_msg_size += RNDIS_VLAN_PPI_SIZE; 831 832 rppi = hv_set_rppi_data(rndis_mesg, RNDIS_VLAN_PPI_SIZE, 833 ieee_8021q_info); 834 835 /* VLAN info immediately follows rppi struct */ 836 rppi_vlan_info = (ndis_8021q_info *)((char*)rppi + 837 rppi->per_packet_info_offset); 838 /* FreeBSD does not support CFI or priority */ 839 rppi_vlan_info->u1.s1.vlan_id = 840 m_head->m_pkthdr.ether_vtag & 0xfff; 841 } 842 843 /* Only check the flags for outbound and ignore the ones for inbound */ 844 if (0 == (m_head->m_pkthdr.csum_flags & HV_CSUM_FOR_OUTBOUND)) { 845 goto pre_send; 846 } 847 848 eh = mtod(m_head, struct ether_vlan_header*); 849 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 850 ether_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 851 } else { 852 ether_len = ETHER_HDR_LEN; 853 } 854 855 trans_proto_type = get_transport_proto_type(m_head); 856 if (TRANSPORT_TYPE_NOT_IP == trans_proto_type) { 857 goto pre_send; 858 } 859 860 /* 861 * TSO packet needless to setup the send side checksum 862 * offload. 863 */ 864 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 865 goto do_tso; 866 } 867 868 /* setup checksum offload */ 869 rndis_msg_size += RNDIS_CSUM_PPI_SIZE; 870 rppi = hv_set_rppi_data(rndis_mesg, RNDIS_CSUM_PPI_SIZE, 871 tcpip_chksum_info); 872 csum_info = (rndis_tcp_ip_csum_info *)((char*)rppi + 873 rppi->per_packet_info_offset); 874 875 if (trans_proto_type & (TYPE_IPV4 << 16)) { 876 csum_info->xmit.is_ipv4 = 1; 877 } else { 878 csum_info->xmit.is_ipv6 = 1; 879 } 880 881 if (trans_proto_type & TYPE_TCP) { 882 csum_info->xmit.tcp_csum = 1; 883 csum_info->xmit.tcp_header_offset = 0; 884 } else if (trans_proto_type & TYPE_UDP) { 885 csum_info->xmit.udp_csum = 1; 886 } 887 888 goto pre_send; 889 890do_tso: 891 /* setup TCP segmentation offload */ 892 rndis_msg_size += RNDIS_TSO_PPI_SIZE; 893 rppi = hv_set_rppi_data(rndis_mesg, RNDIS_TSO_PPI_SIZE, 894 tcp_large_send_info); 895 896 tso_info = (rndis_tcp_tso_info *)((char *)rppi + 897 rppi->per_packet_info_offset); 898 tso_info->lso_v2_xmit.type = 899 RNDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; 900 901#ifdef INET 902 if (trans_proto_type & (TYPE_IPV4 << 16)) { 903 struct ip *ip = 904 (struct ip *)(m_head->m_data + ether_len); 905 unsigned long iph_len = ip->ip_hl << 2; 906 struct tcphdr *th = 907 (struct tcphdr *)((caddr_t)ip + iph_len); 908 909 tso_info->lso_v2_xmit.ip_version = 910 RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV4; 911 ip->ip_len = 0; 912 ip->ip_sum = 0; 913 914 th->th_sum = in_pseudo(ip->ip_src.s_addr, 915 ip->ip_dst.s_addr, 916 htons(IPPROTO_TCP)); 917 } 918#endif 919#if defined(INET6) && defined(INET) 920 else 921#endif 922#ifdef INET6 923 { 924 struct ip6_hdr *ip6 = 925 (struct ip6_hdr *)(m_head->m_data + ether_len); 926 struct tcphdr *th = (struct tcphdr *)(ip6 + 1); 927 928 tso_info->lso_v2_xmit.ip_version = 929 RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV6; 930 ip6->ip6_plen = 0; 931 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 932 } 933#endif 934 tso_info->lso_v2_xmit.tcp_header_offset = 0; 935 tso_info->lso_v2_xmit.mss = m_head->m_pkthdr.tso_segsz; 936 937pre_send: 938 rndis_mesg->msg_len = packet->tot_data_buf_len + rndis_msg_size; 939 packet->tot_data_buf_len = rndis_mesg->msg_len; 940 941 /* send packet with send buffer */ 942 if (packet->tot_data_buf_len < sc->hn_tx_chimney_size) { 943 send_buf_section_idx = 944 hv_nv_get_next_send_section(net_dev); 945 if (send_buf_section_idx != 946 NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) { 947 char *dest = ((char *)net_dev->send_buf + 948 send_buf_section_idx * 949 net_dev->send_section_size); 950 951 memcpy(dest, rndis_mesg, rndis_msg_size); 952 dest += rndis_msg_size; 953 for (m = m_head; m != NULL; m = m->m_next) { 954 if (m->m_len) { 955 memcpy(dest, 956 (void *)mtod(m, vm_offset_t), 957 m->m_len); 958 dest += m->m_len; 959 } 960 } 961 962 packet->send_buf_section_idx = 963 send_buf_section_idx; 964 packet->send_buf_section_size = 965 packet->tot_data_buf_len; 966 packet->page_buf_count = 0; 967 sc->hn_tx_chimney++; 968 goto do_send; 969 } 970 } 971 972 error = hn_txdesc_dmamap_load(sc, txd, &m_head, segs, &nsegs); 973 if (error) { 974 int freed; 975 976 /* 977 * This mbuf is not linked w/ the txd yet, so free 978 * it now. 979 */ 980 m_freem(m_head); 981 freed = hn_txdesc_put(sc, txd); 982 KASSERT(freed != 0, 983 ("fail to free txd upon txdma error")); 984 985 sc->hn_txdma_failed++; 986 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 987 continue; 988 } 989 990 packet->page_buf_count = nsegs + 991 HV_RF_NUM_TX_RESERVED_PAGE_BUFS; 992 993 /* send packet with page buffer */ 994 packet->page_buffers[0].pfn = atop(txd->rndis_msg_paddr); 995 packet->page_buffers[0].offset = 996 txd->rndis_msg_paddr & PAGE_MASK; 997 packet->page_buffers[0].length = rndis_msg_size; 998 999 /* 1000 * Fill the page buffers with mbuf info starting at index 1001 * HV_RF_NUM_TX_RESERVED_PAGE_BUFS. 1002 */ 1003 for (i = 0; i < nsegs; ++i) { 1004 hv_vmbus_page_buffer *pb = &packet->page_buffers[ 1005 i + HV_RF_NUM_TX_RESERVED_PAGE_BUFS]; 1006 1007 pb->pfn = atop(segs[i].ds_addr); 1008 pb->offset = segs[i].ds_addr & PAGE_MASK; 1009 pb->length = segs[i].ds_len; 1010 } 1011 1012 packet->send_buf_section_idx = 1013 NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX; 1014 packet->send_buf_section_size = 0; 1015 1016do_send: 1017 txd->m = m_head; 1018 1019 /* Set the completion routine */ 1020 packet->compl.send.on_send_completion = netvsc_xmit_completion; 1021 packet->compl.send.send_completion_context = packet; 1022 packet->compl.send.send_completion_tid = 1023 (uint64_t)(uintptr_t)txd; 1024 1025again: 1026 /* 1027 * Make sure that txd is not freed before ETHER_BPF_MTAP. 1028 */ 1029 hn_txdesc_hold(txd); 1030 error = hv_nv_on_send(device_ctx, packet); 1031 if (!error) { 1032 ETHER_BPF_MTAP(ifp, m_head); 1033 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 1034 } 1035 hn_txdesc_put(sc, txd); 1036 1037 if (__predict_false(error)) { 1038 int freed; 1039 1040 /* 1041 * This should "really rarely" happen. 1042 * 1043 * XXX Too many RX to be acked or too many sideband 1044 * commands to run? Ask netvsc_channel_rollup() 1045 * to kick start later. 1046 */ 1047 sc->hn_txeof = 1; 1048 if (!send_failed) { 1049 sc->hn_send_failed++; 1050 send_failed = 1; 1051 /* 1052 * Try sending again after set hn_txeof; 1053 * in case that we missed the last 1054 * netvsc_channel_rollup(). 1055 */ 1056 goto again; 1057 } 1058 if_printf(ifp, "send failed\n"); 1059 1060 /* 1061 * This mbuf will be prepended, don't free it 1062 * in hn_txdesc_put(); only unload it from the 1063 * DMA map in hn_txdesc_put(), if it was loaded. 1064 */ 1065 txd->m = NULL; 1066 freed = hn_txdesc_put(sc, txd); 1067 KASSERT(freed != 0, 1068 ("fail to free txd upon send error")); 1069 1070 sc->hn_send_failed++; 1071 IF_PREPEND(&ifp->if_snd, m_head); 1072 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 1073 break; 1074 } 1075 } 1076} 1077 1078/* 1079 * Link up/down notification 1080 */ 1081void 1082netvsc_linkstatus_callback(struct hv_device *device_obj, uint32_t status) 1083{ 1084 hn_softc_t *sc = device_get_softc(device_obj->device); 1085 1086 if (sc == NULL) { 1087 return; 1088 } 1089 1090 if (status == 1) { 1091 sc->hn_carrier = 1; 1092 } else { 1093 sc->hn_carrier = 0; 1094 } 1095} 1096 1097/* 1098 * Append the specified data to the indicated mbuf chain, 1099 * Extend the mbuf chain if the new data does not fit in 1100 * existing space. 1101 * 1102 * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c. 1103 * There should be an equivalent in the kernel mbuf code, 1104 * but there does not appear to be one yet. 1105 * 1106 * Differs from m_append() in that additional mbufs are 1107 * allocated with cluster size MJUMPAGESIZE, and filled 1108 * accordingly. 1109 * 1110 * Return 1 if able to complete the job; otherwise 0. 1111 */ 1112static int 1113hv_m_append(struct mbuf *m0, int len, c_caddr_t cp) 1114{ 1115 struct mbuf *m, *n; 1116 int remainder, space; 1117 1118 for (m = m0; m->m_next != NULL; m = m->m_next) 1119 ; 1120 remainder = len; 1121 space = M_TRAILINGSPACE(m); 1122 if (space > 0) { 1123 /* 1124 * Copy into available space. 1125 */ 1126 if (space > remainder) 1127 space = remainder; 1128 bcopy(cp, mtod(m, caddr_t) + m->m_len, space); 1129 m->m_len += space; 1130 cp += space; 1131 remainder -= space; 1132 } 1133 while (remainder > 0) { 1134 /* 1135 * Allocate a new mbuf; could check space 1136 * and allocate a cluster instead. 1137 */ 1138 n = m_getjcl(M_DONTWAIT, m->m_type, 0, MJUMPAGESIZE); 1139 if (n == NULL) 1140 break; 1141 n->m_len = min(MJUMPAGESIZE, remainder); 1142 bcopy(cp, mtod(n, caddr_t), n->m_len); 1143 cp += n->m_len; 1144 remainder -= n->m_len; 1145 m->m_next = n; 1146 m = n; 1147 } 1148 if (m0->m_flags & M_PKTHDR) 1149 m0->m_pkthdr.len += len - remainder; 1150 1151 return (remainder == 0); 1152} 1153 1154 1155/* 1156 * Called when we receive a data packet from the "wire" on the 1157 * specified device 1158 * 1159 * Note: This is no longer used as a callback 1160 */ 1161int 1162netvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet, 1163 rndis_tcp_ip_csum_info *csum_info) 1164{ 1165 hn_softc_t *sc = (hn_softc_t *)device_get_softc(device_ctx->device); 1166 struct mbuf *m_new; 1167 struct ifnet *ifp; 1168 device_t dev = device_ctx->device; 1169 int size, do_lro = 0; 1170 1171 if (sc == NULL) { 1172 return (0); /* TODO: KYS how can this be! */ 1173 } 1174 1175 ifp = sc->hn_ifp; 1176 1177 ifp = sc->arpcom.ac_ifp; 1178 1179 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 1180 return (0); 1181 } 1182 1183 /* 1184 * Bail out if packet contains more data than configured MTU. 1185 */ 1186 if (packet->tot_data_buf_len > (ifp->if_mtu + ETHER_HDR_LEN)) { 1187 return (0); 1188 } else if (packet->tot_data_buf_len <= MHLEN) { 1189 m_new = m_gethdr(M_NOWAIT, MT_DATA); 1190 if (m_new == NULL) 1191 return (0); 1192 memcpy(mtod(m_new, void *), packet->data, 1193 packet->tot_data_buf_len); 1194 m_new->m_pkthdr.len = m_new->m_len = packet->tot_data_buf_len; 1195 sc->hn_small_pkts++; 1196 } else { 1197 /* 1198 * Get an mbuf with a cluster. For packets 2K or less, 1199 * get a standard 2K cluster. For anything larger, get a 1200 * 4K cluster. Any buffers larger than 4K can cause problems 1201 * if looped around to the Hyper-V TX channel, so avoid them. 1202 */ 1203 size = MCLBYTES; 1204 if (packet->tot_data_buf_len > MCLBYTES) { 1205 /* 4096 */ 1206 size = MJUMPAGESIZE; 1207 } 1208 1209 m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size); 1210 if (m_new == NULL) { 1211 device_printf(dev, "alloc mbuf failed.\n"); 1212 return (0); 1213 } 1214 1215 hv_m_append(m_new, packet->tot_data_buf_len, packet->data); 1216 } 1217 m_new->m_pkthdr.rcvif = ifp; 1218 1219 /* receive side checksum offload */ 1220 if (NULL != csum_info) { 1221 /* IP csum offload */ 1222 if (csum_info->receive.ip_csum_succeeded) { 1223 m_new->m_pkthdr.csum_flags |= 1224 (CSUM_IP_CHECKED | CSUM_IP_VALID); 1225 sc->hn_csum_ip++; 1226 } 1227 1228 /* TCP csum offload */ 1229 if (csum_info->receive.tcp_csum_succeeded) { 1230 m_new->m_pkthdr.csum_flags |= 1231 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1232 m_new->m_pkthdr.csum_data = 0xffff; 1233 sc->hn_csum_tcp++; 1234 } 1235 1236 if (csum_info->receive.ip_csum_succeeded && 1237 csum_info->receive.tcp_csum_succeeded) 1238 do_lro = 1; 1239 } else { 1240 const struct ether_header *eh; 1241 uint16_t etype; 1242 int hoff; 1243 1244 hoff = sizeof(*eh); 1245 if (m_new->m_len < hoff) 1246 goto skip; 1247 eh = mtod(m_new, struct ether_header *); 1248 etype = ntohs(eh->ether_type); 1249 if (etype == ETHERTYPE_VLAN) { 1250 const struct ether_vlan_header *evl; 1251 1252 hoff = sizeof(*evl); 1253 if (m_new->m_len < hoff) 1254 goto skip; 1255 evl = mtod(m_new, struct ether_vlan_header *); 1256 etype = ntohs(evl->evl_proto); 1257 } 1258 1259 if (etype == ETHERTYPE_IP) { 1260 int pr; 1261 1262 pr = hn_check_iplen(m_new, hoff); 1263 if (pr == IPPROTO_TCP) { 1264 if (sc->hn_trust_hosttcp) { 1265 sc->hn_csum_trusted++; 1266 m_new->m_pkthdr.csum_flags |= 1267 (CSUM_IP_CHECKED | CSUM_IP_VALID | 1268 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1269 m_new->m_pkthdr.csum_data = 0xffff; 1270 } 1271 /* Rely on SW csum verification though... */ 1272 do_lro = 1; 1273 } 1274 } 1275 } 1276skip: 1277 if ((packet->vlan_tci != 0) && 1278 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) { 1279 m_new->m_pkthdr.ether_vtag = packet->vlan_tci; 1280 m_new->m_flags |= M_VLANTAG; 1281 } 1282 1283 /* 1284 * Note: Moved RX completion back to hv_nv_on_receive() so all 1285 * messages (not just data messages) will trigger a response. 1286 */ 1287 1288 ifp->if_ipackets++; 1289 1290 if ((ifp->if_capenable & IFCAP_LRO) && do_lro) { 1291#if defined(INET) || defined(INET6) 1292 struct lro_ctrl *lro = &sc->hn_lro; 1293 1294 if (lro->lro_cnt) { 1295 sc->hn_lro_tried++; 1296 if (tcp_lro_rx(lro, m_new, 0) == 0) { 1297 /* DONE! */ 1298 return 0; 1299 } 1300 } 1301#endif 1302 } 1303 1304 /* We're not holding the lock here, so don't release it */ 1305 (*ifp->if_input)(ifp, m_new); 1306 1307 return (0); 1308} 1309 1310void 1311netvsc_recv_rollup(struct hv_device *device_ctx) 1312{ 1313#if defined(INET) || defined(INET6) 1314 hn_softc_t *sc = device_get_softc(device_ctx->device); 1315 struct lro_ctrl *lro = &sc->hn_lro; 1316 struct lro_entry *queued; 1317 1318 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { 1319 SLIST_REMOVE_HEAD(&lro->lro_active, next); 1320 tcp_lro_flush(lro, queued); 1321 } 1322#endif 1323} 1324 1325/* 1326 * Rules for using sc->temp_unusable: 1327 * 1. sc->temp_unusable can only be read or written while holding NV_LOCK() 1328 * 2. code reading sc->temp_unusable under NV_LOCK(), and finding 1329 * sc->temp_unusable set, must release NV_LOCK() and exit 1330 * 3. to retain exclusive control of the interface, 1331 * sc->temp_unusable must be set by code before releasing NV_LOCK() 1332 * 4. only code setting sc->temp_unusable can clear sc->temp_unusable 1333 * 5. code setting sc->temp_unusable must eventually clear sc->temp_unusable 1334 */ 1335 1336/* 1337 * Standard ioctl entry point. Called when the user wants to configure 1338 * the interface. 1339 */ 1340static int 1341hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1342{ 1343 hn_softc_t *sc = ifp->if_softc; 1344 struct ifreq *ifr = (struct ifreq *)data; 1345#ifdef INET 1346 struct ifaddr *ifa = (struct ifaddr *)data; 1347#endif 1348 netvsc_device_info device_info; 1349 struct hv_device *hn_dev; 1350 int mask, error = 0; 1351 int retry_cnt = 500; 1352 1353 switch(cmd) { 1354 1355 case SIOCSIFADDR: 1356#ifdef INET 1357 if (ifa->ifa_addr->sa_family == AF_INET) { 1358 ifp->if_flags |= IFF_UP; 1359 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) 1360 hn_ifinit(sc); 1361 arp_ifinit(ifp, ifa); 1362 } else 1363#endif 1364 error = ether_ioctl(ifp, cmd, data); 1365 break; 1366 case SIOCSIFMTU: 1367 hn_dev = vmbus_get_devctx(sc->hn_dev); 1368 1369 /* Check MTU value change */ 1370 if (ifp->if_mtu == ifr->ifr_mtu) 1371 break; 1372 1373 if (ifr->ifr_mtu > NETVSC_MAX_CONFIGURABLE_MTU) { 1374 error = EINVAL; 1375 break; 1376 } 1377 1378 /* Obtain and record requested MTU */ 1379 ifp->if_mtu = ifr->ifr_mtu; 1380 /* 1381 * Make sure that LRO high watermark is still valid, 1382 * after MTU change (the 2*MTU limit). 1383 */ 1384 if (!HN_LRO_HIWAT_ISVALID(sc, sc->hn_lro_hiwat)) 1385 hn_set_lro_hiwat(sc, HN_LRO_HIWAT_MTULIM(ifp)); 1386 1387 do { 1388 NV_LOCK(sc); 1389 if (!sc->temp_unusable) { 1390 sc->temp_unusable = TRUE; 1391 retry_cnt = -1; 1392 } 1393 NV_UNLOCK(sc); 1394 if (retry_cnt > 0) { 1395 retry_cnt--; 1396 DELAY(5 * 1000); 1397 } 1398 } while (retry_cnt > 0); 1399 1400 if (retry_cnt == 0) { 1401 error = EINVAL; 1402 break; 1403 } 1404 1405 /* We must remove and add back the device to cause the new 1406 * MTU to take effect. This includes tearing down, but not 1407 * deleting the channel, then bringing it back up. 1408 */ 1409 error = hv_rf_on_device_remove(hn_dev, HV_RF_NV_RETAIN_CHANNEL); 1410 if (error) { 1411 NV_LOCK(sc); 1412 sc->temp_unusable = FALSE; 1413 NV_UNLOCK(sc); 1414 break; 1415 } 1416 error = hv_rf_on_device_add(hn_dev, &device_info); 1417 if (error) { 1418 NV_LOCK(sc); 1419 sc->temp_unusable = FALSE; 1420 NV_UNLOCK(sc); 1421 break; 1422 } 1423 1424 sc->hn_tx_chimney_max = sc->net_dev->send_section_size; 1425 if (sc->hn_tx_chimney_size > sc->hn_tx_chimney_max) 1426 sc->hn_tx_chimney_size = sc->hn_tx_chimney_max; 1427 hn_ifinit_locked(sc); 1428 1429 NV_LOCK(sc); 1430 sc->temp_unusable = FALSE; 1431 NV_UNLOCK(sc); 1432 break; 1433 case SIOCSIFFLAGS: 1434 do { 1435 NV_LOCK(sc); 1436 if (!sc->temp_unusable) { 1437 sc->temp_unusable = TRUE; 1438 retry_cnt = -1; 1439 } 1440 NV_UNLOCK(sc); 1441 if (retry_cnt > 0) { 1442 retry_cnt--; 1443 DELAY(5 * 1000); 1444 } 1445 } while (retry_cnt > 0); 1446 1447 if (retry_cnt == 0) { 1448 error = EINVAL; 1449 break; 1450 } 1451 1452 if (ifp->if_flags & IFF_UP) { 1453 /* 1454 * If only the state of the PROMISC flag changed, 1455 * then just use the 'set promisc mode' command 1456 * instead of reinitializing the entire NIC. Doing 1457 * a full re-init means reloading the firmware and 1458 * waiting for it to start up, which may take a 1459 * second or two. 1460 */ 1461#ifdef notyet 1462 /* Fixme: Promiscuous mode? */ 1463 if (ifp->if_drv_flags & IFF_DRV_RUNNING && 1464 ifp->if_flags & IFF_PROMISC && 1465 !(sc->hn_if_flags & IFF_PROMISC)) { 1466 /* do something here for Hyper-V */ 1467 } else if (ifp->if_drv_flags & IFF_DRV_RUNNING && 1468 !(ifp->if_flags & IFF_PROMISC) && 1469 sc->hn_if_flags & IFF_PROMISC) { 1470 /* do something here for Hyper-V */ 1471 } else 1472#endif 1473 hn_ifinit_locked(sc); 1474 } else { 1475 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1476 hn_stop(sc); 1477 } 1478 } 1479 NV_LOCK(sc); 1480 sc->temp_unusable = FALSE; 1481 NV_UNLOCK(sc); 1482 sc->hn_if_flags = ifp->if_flags; 1483 error = 0; 1484 break; 1485 case SIOCSIFCAP: 1486 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 1487 if (mask & IFCAP_TXCSUM) { 1488 if (IFCAP_TXCSUM & ifp->if_capenable) { 1489 ifp->if_capenable &= ~IFCAP_TXCSUM; 1490 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP); 1491 } else { 1492 ifp->if_capenable |= IFCAP_TXCSUM; 1493 /* 1494 * Only enable UDP checksum offloading on 1495 * Windows Server 2012R2 or later releases. 1496 */ 1497 if (hv_vmbus_protocal_version >= 1498 HV_VMBUS_VERSION_WIN8_1) { 1499 ifp->if_hwassist |= 1500 (CSUM_TCP | CSUM_UDP); 1501 } else { 1502 ifp->if_hwassist |= CSUM_TCP; 1503 } 1504 } 1505 } 1506 1507 if (mask & IFCAP_RXCSUM) { 1508 if (IFCAP_RXCSUM & ifp->if_capenable) { 1509 ifp->if_capenable &= ~IFCAP_RXCSUM; 1510 } else { 1511 ifp->if_capenable |= IFCAP_RXCSUM; 1512 } 1513 } 1514 if (mask & IFCAP_LRO) 1515 ifp->if_capenable ^= IFCAP_LRO; 1516 1517 if (mask & IFCAP_TSO4) { 1518 ifp->if_capenable ^= IFCAP_TSO4; 1519 ifp->if_hwassist ^= CSUM_IP_TSO; 1520 } 1521 1522 if (mask & IFCAP_TSO6) { 1523 ifp->if_capenable ^= IFCAP_TSO6; 1524 ifp->if_hwassist ^= CSUM_IP6_TSO; 1525 } 1526 1527 error = 0; 1528 break; 1529 case SIOCADDMULTI: 1530 case SIOCDELMULTI: 1531#ifdef notyet 1532 /* Fixme: Multicast mode? */ 1533 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1534 NV_LOCK(sc); 1535 netvsc_setmulti(sc); 1536 NV_UNLOCK(sc); 1537 error = 0; 1538 } 1539#endif 1540 error = EINVAL; 1541 break; 1542 case SIOCSIFMEDIA: 1543 case SIOCGIFMEDIA: 1544 error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd); 1545 break; 1546 default: 1547 error = ether_ioctl(ifp, cmd, data); 1548 break; 1549 } 1550 1551 return (error); 1552} 1553 1554/* 1555 * 1556 */ 1557static void 1558hn_stop(hn_softc_t *sc) 1559{ 1560 struct ifnet *ifp; 1561 int ret; 1562 struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev); 1563 1564 ifp = sc->hn_ifp; 1565 1566 if (bootverbose) 1567 printf(" Closing Device ...\n"); 1568 1569 ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); 1570 if_link_state_change(ifp, LINK_STATE_DOWN); 1571 sc->hn_initdone = 0; 1572 1573 ret = hv_rf_on_close(device_ctx); 1574} 1575 1576/* 1577 * FreeBSD transmit entry point 1578 */ 1579static void 1580hn_start(struct ifnet *ifp) 1581{ 1582 hn_softc_t *sc; 1583 1584 sc = ifp->if_softc; 1585 NV_LOCK(sc); 1586 if (sc->temp_unusable) { 1587 NV_UNLOCK(sc); 1588 return; 1589 } 1590 hn_start_locked(ifp); 1591 NV_UNLOCK(sc); 1592} 1593 1594/* 1595 * 1596 */ 1597static void 1598hn_ifinit_locked(hn_softc_t *sc) 1599{ 1600 struct ifnet *ifp; 1601 struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev); 1602 int ret; 1603 1604 ifp = sc->hn_ifp; 1605 1606 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1607 return; 1608 } 1609 1610 hv_promisc_mode = 1; 1611 1612 ret = hv_rf_on_open(device_ctx); 1613 if (ret != 0) { 1614 return; 1615 } else { 1616 sc->hn_initdone = 1; 1617 } 1618 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1619 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 1620 if_link_state_change(ifp, LINK_STATE_UP); 1621} 1622 1623/* 1624 * 1625 */ 1626static void 1627hn_ifinit(void *xsc) 1628{ 1629 hn_softc_t *sc = xsc; 1630 1631 NV_LOCK(sc); 1632 if (sc->temp_unusable) { 1633 NV_UNLOCK(sc); 1634 return; 1635 } 1636 sc->temp_unusable = TRUE; 1637 NV_UNLOCK(sc); 1638 1639 hn_ifinit_locked(sc); 1640 1641 NV_LOCK(sc); 1642 sc->temp_unusable = FALSE; 1643 NV_UNLOCK(sc); 1644} 1645 1646#ifdef LATER 1647/* 1648 * 1649 */ 1650static void 1651hn_watchdog(struct ifnet *ifp) 1652{ 1653 hn_softc_t *sc; 1654 sc = ifp->if_softc; 1655 1656 printf("hn%d: watchdog timeout -- resetting\n", sc->hn_unit); 1657 hn_ifinit(sc); /*???*/ 1658 ifp->if_oerrors++; 1659} 1660#endif 1661 1662#ifdef HN_LRO_HIWAT 1663static int 1664hn_lro_hiwat_sysctl(SYSCTL_HANDLER_ARGS) 1665{ 1666 struct hn_softc *sc = arg1; 1667 int hiwat, error; 1668 1669 hiwat = sc->hn_lro_hiwat; 1670 error = sysctl_handle_int(oidp, &hiwat, 0, req); 1671 if (error || req->newptr == NULL) 1672 return error; 1673 1674 if (!HN_LRO_HIWAT_ISVALID(sc, hiwat)) 1675 return EINVAL; 1676 1677 if (sc->hn_lro_hiwat != hiwat) 1678 hn_set_lro_hiwat(sc, hiwat); 1679 return 0; 1680} 1681#endif /* HN_LRO_HIWAT */ 1682 1683static int 1684hn_tx_chimney_size_sysctl(SYSCTL_HANDLER_ARGS) 1685{ 1686 struct hn_softc *sc = arg1; 1687 int chimney_size, error; 1688 1689 chimney_size = sc->hn_tx_chimney_size; 1690 error = sysctl_handle_int(oidp, &chimney_size, 0, req); 1691 if (error || req->newptr == NULL) 1692 return error; 1693 1694 if (chimney_size > sc->hn_tx_chimney_max || chimney_size <= 0) 1695 return EINVAL; 1696 1697 if (sc->hn_tx_chimney_size != chimney_size) 1698 sc->hn_tx_chimney_size = chimney_size; 1699 return 0; 1700} 1701 1702static int 1703hn_check_iplen(const struct mbuf *m, int hoff) 1704{ 1705 const struct ip *ip; 1706 int len, iphlen, iplen; 1707 const struct tcphdr *th; 1708 int thoff; /* TCP data offset */ 1709 1710 len = hoff + sizeof(struct ip); 1711 1712 /* The packet must be at least the size of an IP header. */ 1713 if (m->m_pkthdr.len < len) 1714 return IPPROTO_DONE; 1715 1716 /* The fixed IP header must reside completely in the first mbuf. */ 1717 if (m->m_len < len) 1718 return IPPROTO_DONE; 1719 1720 ip = mtodo(m, hoff); 1721 1722 /* Bound check the packet's stated IP header length. */ 1723 iphlen = ip->ip_hl << 2; 1724 if (iphlen < sizeof(struct ip)) /* minimum header length */ 1725 return IPPROTO_DONE; 1726 1727 /* The full IP header must reside completely in the one mbuf. */ 1728 if (m->m_len < hoff + iphlen) 1729 return IPPROTO_DONE; 1730 1731 iplen = ntohs(ip->ip_len); 1732 1733 /* 1734 * Check that the amount of data in the buffers is as 1735 * at least much as the IP header would have us expect. 1736 */ 1737 if (m->m_pkthdr.len < hoff + iplen) 1738 return IPPROTO_DONE; 1739 1740 /* 1741 * Ignore IP fragments. 1742 */ 1743 if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF)) 1744 return IPPROTO_DONE; 1745 1746 /* 1747 * The TCP/IP or UDP/IP header must be entirely contained within 1748 * the first fragment of a packet. 1749 */ 1750 switch (ip->ip_p) { 1751 case IPPROTO_TCP: 1752 if (iplen < iphlen + sizeof(struct tcphdr)) 1753 return IPPROTO_DONE; 1754 if (m->m_len < hoff + iphlen + sizeof(struct tcphdr)) 1755 return IPPROTO_DONE; 1756 th = (const struct tcphdr *)((const uint8_t *)ip + iphlen); 1757 thoff = th->th_off << 2; 1758 if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen) 1759 return IPPROTO_DONE; 1760 if (m->m_len < hoff + iphlen + thoff) 1761 return IPPROTO_DONE; 1762 break; 1763 case IPPROTO_UDP: 1764 if (iplen < iphlen + sizeof(struct udphdr)) 1765 return IPPROTO_DONE; 1766 if (m->m_len < hoff + iphlen + sizeof(struct udphdr)) 1767 return IPPROTO_DONE; 1768 break; 1769 default: 1770 if (iplen < iphlen) 1771 return IPPROTO_DONE; 1772 break; 1773 } 1774 return ip->ip_p; 1775} 1776 1777static void 1778hn_dma_map_paddr(void *arg, bus_dma_segment_t *segs, int nseg, int error) 1779{ 1780 bus_addr_t *paddr = arg; 1781 1782 if (error) 1783 return; 1784 1785 KASSERT(nseg == 1, ("too many segments %d!", nseg)); 1786 *paddr = segs->ds_addr; 1787} 1788 1789static int 1790hn_create_tx_ring(struct hn_softc *sc) 1791{ 1792 bus_dma_tag_t parent_dtag; 1793 int error, i; 1794 1795 sc->hn_txdesc_cnt = HN_TX_DESC_CNT; 1796 sc->hn_txdesc = malloc(sizeof(struct hn_txdesc) * sc->hn_txdesc_cnt, 1797 M_NETVSC, M_WAITOK | M_ZERO); 1798 SLIST_INIT(&sc->hn_txlist); 1799 mtx_init(&sc->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN); 1800 1801 parent_dtag = bus_get_dma_tag(sc->hn_dev); 1802 1803 /* DMA tag for RNDIS messages. */ 1804 error = bus_dma_tag_create(parent_dtag, /* parent */ 1805 HN_RNDIS_MSG_ALIGN, /* alignment */ 1806 HN_RNDIS_MSG_BOUNDARY, /* boundary */ 1807 BUS_SPACE_MAXADDR, /* lowaddr */ 1808 BUS_SPACE_MAXADDR, /* highaddr */ 1809 NULL, NULL, /* filter, filterarg */ 1810 HN_RNDIS_MSG_LEN, /* maxsize */ 1811 1, /* nsegments */ 1812 HN_RNDIS_MSG_LEN, /* maxsegsize */ 1813 0, /* flags */ 1814 NULL, /* lockfunc */ 1815 NULL, /* lockfuncarg */ 1816 &sc->hn_tx_rndis_dtag); 1817 if (error) { 1818 device_printf(sc->hn_dev, "failed to create rndis dmatag\n"); 1819 return error; 1820 } 1821 1822 /* DMA tag for data. */ 1823 error = bus_dma_tag_create(parent_dtag, /* parent */ 1824 1, /* alignment */ 1825 HN_TX_DATA_BOUNDARY, /* boundary */ 1826 BUS_SPACE_MAXADDR, /* lowaddr */ 1827 BUS_SPACE_MAXADDR, /* highaddr */ 1828 NULL, NULL, /* filter, filterarg */ 1829 HN_TX_DATA_MAXSIZE, /* maxsize */ 1830 HN_TX_DATA_SEGCNT_MAX, /* nsegments */ 1831 HN_TX_DATA_SEGSIZE, /* maxsegsize */ 1832 0, /* flags */ 1833 NULL, /* lockfunc */ 1834 NULL, /* lockfuncarg */ 1835 &sc->hn_tx_data_dtag); 1836 if (error) { 1837 device_printf(sc->hn_dev, "failed to create data dmatag\n"); 1838 return error; 1839 } 1840 1841 for (i = 0; i < sc->hn_txdesc_cnt; ++i) { 1842 struct hn_txdesc *txd = &sc->hn_txdesc[i]; 1843 1844 txd->sc = sc; 1845 1846 /* 1847 * Allocate and load RNDIS messages. 1848 */ 1849 error = bus_dmamem_alloc(sc->hn_tx_rndis_dtag, 1850 (void **)&txd->rndis_msg, 1851 BUS_DMA_WAITOK | BUS_DMA_COHERENT, 1852 &txd->rndis_msg_dmap); 1853 if (error) { 1854 device_printf(sc->hn_dev, 1855 "failed to allocate rndis_msg, %d\n", i); 1856 return error; 1857 } 1858 1859 error = bus_dmamap_load(sc->hn_tx_rndis_dtag, 1860 txd->rndis_msg_dmap, 1861 txd->rndis_msg, HN_RNDIS_MSG_LEN, 1862 hn_dma_map_paddr, &txd->rndis_msg_paddr, 1863 BUS_DMA_NOWAIT); 1864 if (error) { 1865 device_printf(sc->hn_dev, 1866 "failed to load rndis_msg, %d\n", i); 1867 bus_dmamem_free(sc->hn_tx_rndis_dtag, 1868 txd->rndis_msg, txd->rndis_msg_dmap); 1869 return error; 1870 } 1871 1872 /* DMA map for TX data. */ 1873 error = bus_dmamap_create(sc->hn_tx_data_dtag, 0, 1874 &txd->data_dmap); 1875 if (error) { 1876 device_printf(sc->hn_dev, 1877 "failed to allocate tx data dmamap\n"); 1878 bus_dmamap_unload(sc->hn_tx_rndis_dtag, 1879 txd->rndis_msg_dmap); 1880 bus_dmamem_free(sc->hn_tx_rndis_dtag, 1881 txd->rndis_msg, txd->rndis_msg_dmap); 1882 return error; 1883 } 1884 1885 /* All set, put it to list */ 1886 txd->flags |= HN_TXD_FLAG_ONLIST; 1887 SLIST_INSERT_HEAD(&sc->hn_txlist, txd, link); 1888 } 1889 sc->hn_txdesc_avail = sc->hn_txdesc_cnt; 1890 1891 return 0; 1892} 1893 1894static void 1895hn_destroy_tx_ring(struct hn_softc *sc) 1896{ 1897 struct hn_txdesc *txd; 1898 1899 while ((txd = SLIST_FIRST(&sc->hn_txlist)) != NULL) { 1900 KASSERT(txd->m == NULL, ("still has mbuf installed")); 1901 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, 1902 ("still dma mapped")); 1903 SLIST_REMOVE_HEAD(&sc->hn_txlist, link); 1904 1905 bus_dmamap_unload(sc->hn_tx_rndis_dtag, 1906 txd->rndis_msg_dmap); 1907 bus_dmamem_free(sc->hn_tx_rndis_dtag, 1908 txd->rndis_msg, txd->rndis_msg_dmap); 1909 1910 bus_dmamap_destroy(sc->hn_tx_data_dtag, txd->data_dmap); 1911 } 1912 1913 if (sc->hn_tx_data_dtag != NULL) 1914 bus_dma_tag_destroy(sc->hn_tx_data_dtag); 1915 if (sc->hn_tx_rndis_dtag != NULL) 1916 bus_dma_tag_destroy(sc->hn_tx_rndis_dtag); 1917 free(sc->hn_txdesc, M_NETVSC); 1918 mtx_destroy(&sc->hn_txlist_spin); 1919} 1920 1921static device_method_t netvsc_methods[] = { 1922 /* Device interface */ 1923 DEVMETHOD(device_probe, netvsc_probe), 1924 DEVMETHOD(device_attach, netvsc_attach), 1925 DEVMETHOD(device_detach, netvsc_detach), 1926 DEVMETHOD(device_shutdown, netvsc_shutdown), 1927 1928 { 0, 0 } 1929}; 1930 1931static driver_t netvsc_driver = { 1932 NETVSC_DEVNAME, 1933 netvsc_methods, 1934 sizeof(hn_softc_t) 1935}; 1936 1937static devclass_t netvsc_devclass; 1938 1939DRIVER_MODULE(hn, vmbus, netvsc_driver, netvsc_devclass, 0, 0); 1940MODULE_VERSION(hn, 1); 1941MODULE_DEPEND(hn, vmbus, 1, 1, 1); 1942