hv_netvsc_drv_freebsd.c revision 295789
1/*- 2 * Copyright (c) 2010-2012 Citrix Inc. 3 * Copyright (c) 2009-2012 Microsoft Corp. 4 * Copyright (c) 2012 NetApp Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29/*- 30 * Copyright (c) 2004-2006 Kip Macy 31 * All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 52 * SUCH DAMAGE. 53 */ 54 55#include <sys/cdefs.h> 56__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c 295789 2016-02-19 02:03:14Z sephe $"); 57 58#include "opt_inet6.h" 59#include "opt_inet.h" 60 61#include <sys/param.h> 62#include <sys/systm.h> 63#include <sys/sockio.h> 64#include <sys/mbuf.h> 65#include <sys/malloc.h> 66#include <sys/module.h> 67#include <sys/kernel.h> 68#include <sys/socket.h> 69#include <sys/queue.h> 70#include <sys/lock.h> 71#include <sys/sx.h> 72#include <sys/sysctl.h> 73 74#include <net/if.h> 75#include <net/if_arp.h> 76#include <net/ethernet.h> 77#include <net/if_dl.h> 78#include <net/if_media.h> 79 80#include <net/bpf.h> 81 82#include <net/if_types.h> 83#include <net/if_vlan_var.h> 84#include <net/if.h> 85 86#include <netinet/in_systm.h> 87#include <netinet/in.h> 88#include <netinet/ip.h> 89#include <netinet/if_ether.h> 90#include <netinet/tcp.h> 91#include <netinet/udp.h> 92#include <netinet/ip6.h> 93 94#include <vm/vm.h> 95#include <vm/vm_param.h> 96#include <vm/vm_kern.h> 97#include <vm/pmap.h> 98 99#include <machine/bus.h> 100#include <machine/resource.h> 101#include <machine/frame.h> 102#include <machine/vmparam.h> 103 104#include <sys/bus.h> 105#include <sys/rman.h> 106#include <sys/mutex.h> 107#include <sys/errno.h> 108#include <sys/types.h> 109#include <machine/atomic.h> 110 111#include <machine/intr_machdep.h> 112 113#include <machine/in_cksum.h> 114 115#include <dev/hyperv/include/hyperv.h> 116#include "hv_net_vsc.h" 117#include "hv_rndis.h" 118#include "hv_rndis_filter.h" 119 120 121/* Short for Hyper-V network interface */ 122#define NETVSC_DEVNAME "hn" 123 124/* 125 * It looks like offset 0 of buf is reserved to hold the softc pointer. 126 * The sc pointer evidently not needed, and is not presently populated. 127 * The packet offset is where the netvsc_packet starts in the buffer. 128 */ 129#define HV_NV_SC_PTR_OFFSET_IN_BUF 0 130#define HV_NV_PACKET_OFFSET_IN_BUF 16 131 132/* 133 * A unified flag for all outbound check sum flags is useful, 134 * and it helps avoiding unnecessary check sum calculation in 135 * network forwarding scenario. 136 */ 137#define HV_CSUM_FOR_OUTBOUND \ 138 (CSUM_IP|CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP|CSUM_IP_TSO| \ 139 CSUM_IP_ISCSI|CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP| \ 140 CSUM_IP6_TSO|CSUM_IP6_ISCSI) 141 142/* XXX move to netinet/tcp_lro.h */ 143#define HN_LRO_HIWAT_MAX 65535 144#define HN_LRO_HIWAT_DEF HN_LRO_HIWAT_MAX 145/* YYY 2*MTU is a bit rough, but should be good enough. */ 146#define HN_LRO_HIWAT_MTULIM(ifp) (2 * (ifp)->if_mtu) 147#define HN_LRO_HIWAT_ISVALID(sc, hiwat) \ 148 ((hiwat) >= HN_LRO_HIWAT_MTULIM((sc)->hn_ifp) || \ 149 (hiwat) <= HN_LRO_HIWAT_MAX) 150 151/* 152 * Be aware that this sleepable mutex will exhibit WITNESS errors when 153 * certain TCP and ARP code paths are taken. This appears to be a 154 * well-known condition, as all other drivers checked use a sleeping 155 * mutex to protect their transmit paths. 156 * Also Be aware that mutexes do not play well with semaphores, and there 157 * is a conflicting semaphore in a certain channel code path. 158 */ 159#define NV_LOCK_INIT(_sc, _name) \ 160 mtx_init(&(_sc)->hn_lock, _name, MTX_NETWORK_LOCK, MTX_DEF) 161#define NV_LOCK(_sc) mtx_lock(&(_sc)->hn_lock) 162#define NV_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->hn_lock, MA_OWNED) 163#define NV_UNLOCK(_sc) mtx_unlock(&(_sc)->hn_lock) 164#define NV_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->hn_lock) 165 166 167/* 168 * Globals 169 */ 170 171int hv_promisc_mode = 0; /* normal mode by default */ 172 173/* Trust tcp segements verification on host side. */ 174static int hn_trust_hosttcp = 0; 175TUNABLE_INT("dev.hn.trust_hosttcp", &hn_trust_hosttcp); 176 177/* 178 * Forward declarations 179 */ 180static void hn_stop(hn_softc_t *sc); 181static void hn_ifinit_locked(hn_softc_t *sc); 182static void hn_ifinit(void *xsc); 183static int hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); 184static int hn_start_locked(struct ifnet *ifp); 185static void hn_start(struct ifnet *ifp); 186static int hn_ifmedia_upd(struct ifnet *ifp); 187static void hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); 188#ifdef HN_LRO_HIWAT 189static int hn_lro_hiwat_sysctl(SYSCTL_HANDLER_ARGS); 190#endif 191static int hn_check_iplen(const struct mbuf *, int); 192 193static __inline void 194hn_set_lro_hiwat(struct hn_softc *sc, int hiwat) 195{ 196 sc->hn_lro_hiwat = hiwat; 197#ifdef HN_LRO_HIWAT 198 sc->hn_lro.lro_hiwat = sc->hn_lro_hiwat; 199#endif 200} 201 202/* 203 * NetVsc get message transport protocol type 204 */ 205static uint32_t get_transport_proto_type(struct mbuf *m_head) 206{ 207 uint32_t ret_val = TRANSPORT_TYPE_NOT_IP; 208 uint16_t ether_type = 0; 209 int ether_len = 0; 210 struct ether_vlan_header *eh; 211#ifdef INET 212 struct ip *iph; 213#endif 214#ifdef INET6 215 struct ip6_hdr *ip6; 216#endif 217 218 eh = mtod(m_head, struct ether_vlan_header*); 219 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 220 ether_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 221 ether_type = eh->evl_proto; 222 } else { 223 ether_len = ETHER_HDR_LEN; 224 ether_type = eh->evl_encap_proto; 225 } 226 227 switch (ntohs(ether_type)) { 228#ifdef INET6 229 case ETHERTYPE_IPV6: 230 ip6 = (struct ip6_hdr *)(m_head->m_data + ether_len); 231 232 if (IPPROTO_TCP == ip6->ip6_nxt) { 233 ret_val = TRANSPORT_TYPE_IPV6_TCP; 234 } else if (IPPROTO_UDP == ip6->ip6_nxt) { 235 ret_val = TRANSPORT_TYPE_IPV6_UDP; 236 } 237 break; 238#endif 239#ifdef INET 240 case ETHERTYPE_IP: 241 iph = (struct ip *)(m_head->m_data + ether_len); 242 243 if (IPPROTO_TCP == iph->ip_p) { 244 ret_val = TRANSPORT_TYPE_IPV4_TCP; 245 } else if (IPPROTO_UDP == iph->ip_p) { 246 ret_val = TRANSPORT_TYPE_IPV4_UDP; 247 } 248 break; 249#endif 250 default: 251 ret_val = TRANSPORT_TYPE_NOT_IP; 252 break; 253 } 254 255 return (ret_val); 256} 257 258static int 259hn_ifmedia_upd(struct ifnet *ifp __unused) 260{ 261 262 return EOPNOTSUPP; 263} 264 265static void 266hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 267{ 268 struct hn_softc *sc = ifp->if_softc; 269 270 ifmr->ifm_status = IFM_AVALID; 271 ifmr->ifm_active = IFM_ETHER; 272 273 if (!sc->hn_carrier) { 274 ifmr->ifm_active |= IFM_NONE; 275 return; 276 } 277 ifmr->ifm_status |= IFM_ACTIVE; 278 ifmr->ifm_active |= IFM_10G_T | IFM_FDX; 279} 280 281/* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */ 282static const hv_guid g_net_vsc_device_type = { 283 .data = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46, 284 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E} 285}; 286 287/* 288 * Standard probe entry point. 289 * 290 */ 291static int 292netvsc_probe(device_t dev) 293{ 294 const char *p; 295 296 p = vmbus_get_type(dev); 297 if (!memcmp(p, &g_net_vsc_device_type.data, sizeof(hv_guid))) { 298 device_set_desc(dev, "Synthetic Network Interface"); 299 if (bootverbose) 300 printf("Netvsc probe... DONE \n"); 301 302 return (BUS_PROBE_DEFAULT); 303 } 304 305 return (ENXIO); 306} 307 308/* 309 * Standard attach entry point. 310 * 311 * Called when the driver is loaded. It allocates needed resources, 312 * and initializes the "hardware" and software. 313 */ 314static int 315netvsc_attach(device_t dev) 316{ 317 struct hv_device *device_ctx = vmbus_get_devctx(dev); 318 netvsc_device_info device_info; 319 hn_softc_t *sc; 320 int unit = device_get_unit(dev); 321 struct ifnet *ifp; 322 struct sysctl_oid_list *child; 323 struct sysctl_ctx_list *ctx; 324 int ret; 325 326 sc = device_get_softc(dev); 327 if (sc == NULL) { 328 return (ENOMEM); 329 } 330 331 bzero(sc, sizeof(hn_softc_t)); 332 sc->hn_unit = unit; 333 sc->hn_dev = dev; 334 sc->hn_lro_hiwat = HN_LRO_HIWAT_DEF; 335 sc->hn_trust_hosttcp = hn_trust_hosttcp; 336 337 NV_LOCK_INIT(sc, "NetVSCLock"); 338 339 sc->hn_dev_obj = device_ctx; 340 341 ifp = sc->hn_ifp = sc->arpcom.ac_ifp = if_alloc(IFT_ETHER); 342 ifp->if_softc = sc; 343 344 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 345 ifp->if_dunit = unit; 346 ifp->if_dname = NETVSC_DEVNAME; 347 348 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 349 ifp->if_ioctl = hn_ioctl; 350 ifp->if_start = hn_start; 351 ifp->if_init = hn_ifinit; 352 /* needed by hv_rf_on_device_add() code */ 353 ifp->if_mtu = ETHERMTU; 354 IFQ_SET_MAXLEN(&ifp->if_snd, 512); 355 ifp->if_snd.ifq_drv_maxlen = 511; 356 IFQ_SET_READY(&ifp->if_snd); 357 358 ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts); 359 ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL); 360 ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO); 361 /* XXX ifmedia_set really should do this for us */ 362 sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media; 363 364 /* 365 * Tell upper layers that we support full VLAN capability. 366 */ 367 ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); 368 ifp->if_capabilities |= 369 IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO | 370 IFCAP_LRO; 371 ifp->if_capenable |= 372 IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO | 373 IFCAP_LRO; 374 /* 375 * Only enable UDP checksum offloading when it is on 2012R2 or 376 * later. UDP checksum offloading doesn't work on earlier 377 * Windows releases. 378 */ 379 if (hv_vmbus_protocal_version >= HV_VMBUS_VERSION_WIN8_1) 380 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 381 else 382 ifp->if_hwassist = CSUM_TCP | CSUM_TSO; 383 384 ret = hv_rf_on_device_add(device_ctx, &device_info); 385 if (ret != 0) { 386 if_free(ifp); 387 388 return (ret); 389 } 390 if (device_info.link_state == 0) { 391 sc->hn_carrier = 1; 392 } 393 394#if defined(INET) || defined(INET6) 395 tcp_lro_init(&sc->hn_lro); 396 /* Driver private LRO settings */ 397 sc->hn_lro.ifp = ifp; 398#ifdef HN_LRO_HIWAT 399 sc->hn_lro.lro_hiwat = sc->hn_lro_hiwat; 400#endif 401#endif /* INET || INET6 */ 402 403 ether_ifattach(ifp, device_info.mac_addr); 404 405 ctx = device_get_sysctl_ctx(dev); 406 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 407 408 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "lro_queued", 409 CTLFLAG_RW, &sc->hn_lro.lro_queued, 0, "LRO queued"); 410 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "lro_flushed", 411 CTLFLAG_RW, &sc->hn_lro.lro_flushed, 0, "LRO flushed"); 412 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "lro_tried", 413 CTLFLAG_RW, &sc->hn_lro_tried, "# of LRO tries"); 414#ifdef HN_LRO_HIWAT 415 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_hiwat", 416 CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_lro_hiwat_sysctl, 417 "I", "LRO high watermark"); 418#endif 419 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "trust_hosttcp", 420 CTLFLAG_RW, &sc->hn_trust_hosttcp, 0, 421 "Trust tcp segement verification on host side, " 422 "when csum info is missing"); 423 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "csum_ip", 424 CTLFLAG_RW, &sc->hn_csum_ip, "RXCSUM IP"); 425 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "csum_tcp", 426 CTLFLAG_RW, &sc->hn_csum_tcp, "RXCSUM TCP"); 427 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "csum_trusted", 428 CTLFLAG_RW, &sc->hn_csum_trusted, 429 "# of TCP segements that we trust host's csum verification"); 430 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "small_pkts", 431 CTLFLAG_RW, &sc->hn_small_pkts, "# of small packets received"); 432 433 if (unit == 0) { 434 struct sysctl_ctx_list *dc_ctx; 435 struct sysctl_oid_list *dc_child; 436 devclass_t dc; 437 438 /* 439 * Add sysctl nodes for devclass 440 */ 441 dc = device_get_devclass(dev); 442 dc_ctx = devclass_get_sysctl_ctx(dc); 443 dc_child = SYSCTL_CHILDREN(devclass_get_sysctl_tree(dc)); 444 445 SYSCTL_ADD_INT(dc_ctx, dc_child, OID_AUTO, "trust_hosttcp", 446 CTLFLAG_RD, &hn_trust_hosttcp, 0, 447 "Trust tcp segement verification on host side, " 448 "when csum info is missing (global setting)"); 449 } 450 451 return (0); 452} 453 454/* 455 * Standard detach entry point 456 */ 457static int 458netvsc_detach(device_t dev) 459{ 460 struct hn_softc *sc = device_get_softc(dev); 461 struct hv_device *hv_device = vmbus_get_devctx(dev); 462 463 if (bootverbose) 464 printf("netvsc_detach\n"); 465 466 /* 467 * XXXKYS: Need to clean up all our 468 * driver state; this is the driver 469 * unloading. 470 */ 471 472 /* 473 * XXXKYS: Need to stop outgoing traffic and unregister 474 * the netdevice. 475 */ 476 477 hv_rf_on_device_remove(hv_device, HV_RF_NV_DESTROY_CHANNEL); 478 479 ifmedia_removeall(&sc->hn_media); 480#if defined(INET) || defined(INET6) 481 tcp_lro_free(&sc->hn_lro); 482#endif 483 484 return (0); 485} 486 487/* 488 * Standard shutdown entry point 489 */ 490static int 491netvsc_shutdown(device_t dev) 492{ 493 return (0); 494} 495 496/* 497 * Send completion processing 498 * 499 * Note: It looks like offset 0 of buf is reserved to hold the softc 500 * pointer. The sc pointer is not currently needed in this function, and 501 * it is not presently populated by the TX function. 502 */ 503void 504netvsc_xmit_completion(void *context) 505{ 506 netvsc_packet *packet = (netvsc_packet *)context; 507 struct mbuf *mb; 508 uint8_t *buf; 509 510 mb = (struct mbuf *)(uintptr_t)packet->compl.send.send_completion_tid; 511 buf = ((uint8_t *)packet) - HV_NV_PACKET_OFFSET_IN_BUF; 512 513 free(buf, M_NETVSC); 514 515 if (mb != NULL) { 516 m_freem(mb); 517 } 518} 519 520/* 521 * Start a transmit of one or more packets 522 */ 523static int 524hn_start_locked(struct ifnet *ifp) 525{ 526 hn_softc_t *sc = ifp->if_softc; 527 struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev); 528 netvsc_dev *net_dev = sc->net_dev; 529 device_t dev = device_ctx->device; 530 uint8_t *buf; 531 netvsc_packet *packet; 532 struct mbuf *m_head, *m; 533 struct mbuf *mc_head = NULL; 534 struct ether_vlan_header *eh; 535 rndis_msg *rndis_mesg; 536 rndis_packet *rndis_pkt; 537 rndis_per_packet_info *rppi; 538 ndis_8021q_info *rppi_vlan_info; 539 rndis_tcp_ip_csum_info *csum_info; 540 rndis_tcp_tso_info *tso_info; 541 int ether_len; 542 int i; 543 int num_frags; 544 int len; 545 int retries = 0; 546 int ret = 0; 547 uint32_t rndis_msg_size = 0; 548 uint32_t trans_proto_type; 549 uint32_t send_buf_section_idx = 550 NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX; 551 552 while (!IFQ_DRV_IS_EMPTY(&sc->hn_ifp->if_snd)) { 553 IFQ_DRV_DEQUEUE(&sc->hn_ifp->if_snd, m_head); 554 if (m_head == NULL) { 555 break; 556 } 557 558 len = 0; 559 num_frags = 0; 560 561 /* Walk the mbuf list computing total length and num frags */ 562 for (m = m_head; m != NULL; m = m->m_next) { 563 if (m->m_len != 0) { 564 num_frags++; 565 len += m->m_len; 566 } 567 } 568 569 /* 570 * Reserve the number of pages requested. Currently, 571 * one page is reserved for the message in the RNDIS 572 * filter packet 573 */ 574 num_frags += HV_RF_NUM_TX_RESERVED_PAGE_BUFS; 575 576 /* If exceeds # page_buffers in netvsc_packet */ 577 if (num_frags > NETVSC_PACKET_MAXPAGE) { 578 device_printf(dev, "exceed max page buffers,%d,%d\n", 579 num_frags, NETVSC_PACKET_MAXPAGE); 580 m_freem(m_head); 581 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 582 return (EINVAL); 583 } 584 585 /* 586 * Allocate a buffer with space for a netvsc packet plus a 587 * number of reserved areas. First comes a (currently 16 588 * bytes, currently unused) reserved data area. Second is 589 * the netvsc_packet. Third is an area reserved for an 590 * rndis_filter_packet struct. Fourth (optional) is a 591 * rndis_per_packet_info struct. 592 * Changed malloc to M_NOWAIT to avoid sleep under spin lock. 593 * No longer reserving extra space for page buffers, as they 594 * are already part of the netvsc_packet. 595 */ 596 buf = malloc(HV_NV_PACKET_OFFSET_IN_BUF + 597 sizeof(netvsc_packet) + 598 sizeof(rndis_msg) + 599 RNDIS_VLAN_PPI_SIZE + 600 RNDIS_TSO_PPI_SIZE + 601 RNDIS_CSUM_PPI_SIZE, 602 M_NETVSC, M_ZERO | M_NOWAIT); 603 if (buf == NULL) { 604 device_printf(dev, "hn:malloc packet failed\n"); 605 m_freem(m_head); 606 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 607 return (ENOMEM); 608 } 609 610 packet = (netvsc_packet *)(buf + HV_NV_PACKET_OFFSET_IN_BUF); 611 *(vm_offset_t *)buf = HV_NV_SC_PTR_OFFSET_IN_BUF; 612 613 packet->is_data_pkt = TRUE; 614 615 /* Set up the rndis header */ 616 packet->page_buf_count = num_frags; 617 618 /* Initialize it from the mbuf */ 619 packet->tot_data_buf_len = len; 620 621 /* 622 * extension points to the area reserved for the 623 * rndis_filter_packet, which is placed just after 624 * the netvsc_packet (and rppi struct, if present; 625 * length is updated later). 626 */ 627 packet->rndis_mesg = packet + 1; 628 rndis_mesg = (rndis_msg *)packet->rndis_mesg; 629 rndis_mesg->ndis_msg_type = REMOTE_NDIS_PACKET_MSG; 630 631 rndis_pkt = &rndis_mesg->msg.packet; 632 rndis_pkt->data_offset = sizeof(rndis_packet); 633 rndis_pkt->data_length = packet->tot_data_buf_len; 634 rndis_pkt->per_pkt_info_offset = sizeof(rndis_packet); 635 636 rndis_msg_size = RNDIS_MESSAGE_SIZE(rndis_packet); 637 638 /* 639 * If the Hyper-V infrastructure needs to embed a VLAN tag, 640 * initialize netvsc_packet and rppi struct values as needed. 641 */ 642 if (m_head->m_flags & M_VLANTAG) { 643 /* 644 * set up some additional fields so the Hyper-V infrastructure will stuff the VLAN tag 645 * into the frame. 646 */ 647 packet->vlan_tci = m_head->m_pkthdr.ether_vtag; 648 649 rndis_msg_size += RNDIS_VLAN_PPI_SIZE; 650 651 rppi = hv_set_rppi_data(rndis_mesg, RNDIS_VLAN_PPI_SIZE, 652 ieee_8021q_info); 653 654 /* VLAN info immediately follows rppi struct */ 655 rppi_vlan_info = (ndis_8021q_info *)((char*)rppi + 656 rppi->per_packet_info_offset); 657 /* FreeBSD does not support CFI or priority */ 658 rppi_vlan_info->u1.s1.vlan_id = 659 packet->vlan_tci & 0xfff; 660 } 661 662 /* Only check the flags for outbound and ignore the ones for inbound */ 663 if (0 == (m_head->m_pkthdr.csum_flags & HV_CSUM_FOR_OUTBOUND)) { 664 goto pre_send; 665 } 666 667 eh = mtod(m_head, struct ether_vlan_header*); 668 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 669 ether_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 670 } else { 671 ether_len = ETHER_HDR_LEN; 672 } 673 674 trans_proto_type = get_transport_proto_type(m_head); 675 if (TRANSPORT_TYPE_NOT_IP == trans_proto_type) { 676 goto pre_send; 677 } 678 679 /* 680 * TSO packet needless to setup the send side checksum 681 * offload. 682 */ 683 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 684 goto do_tso; 685 } 686 687 /* setup checksum offload */ 688 rndis_msg_size += RNDIS_CSUM_PPI_SIZE; 689 rppi = hv_set_rppi_data(rndis_mesg, RNDIS_CSUM_PPI_SIZE, 690 tcpip_chksum_info); 691 csum_info = (rndis_tcp_ip_csum_info *)((char*)rppi + 692 rppi->per_packet_info_offset); 693 694 if (trans_proto_type & (TYPE_IPV4 << 16)) { 695 csum_info->xmit.is_ipv4 = 1; 696 } else { 697 csum_info->xmit.is_ipv6 = 1; 698 } 699 700 if (trans_proto_type & TYPE_TCP) { 701 csum_info->xmit.tcp_csum = 1; 702 csum_info->xmit.tcp_header_offset = 0; 703 } else if (trans_proto_type & TYPE_UDP) { 704 csum_info->xmit.udp_csum = 1; 705 } 706 707 goto pre_send; 708 709do_tso: 710 /* setup TCP segmentation offload */ 711 rndis_msg_size += RNDIS_TSO_PPI_SIZE; 712 rppi = hv_set_rppi_data(rndis_mesg, RNDIS_TSO_PPI_SIZE, 713 tcp_large_send_info); 714 715 tso_info = (rndis_tcp_tso_info *)((char *)rppi + 716 rppi->per_packet_info_offset); 717 tso_info->lso_v2_xmit.type = 718 RNDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; 719 720#ifdef INET 721 if (trans_proto_type & (TYPE_IPV4 << 16)) { 722 struct ip *ip = 723 (struct ip *)(m_head->m_data + ether_len); 724 unsigned long iph_len = ip->ip_hl << 2; 725 struct tcphdr *th = 726 (struct tcphdr *)((caddr_t)ip + iph_len); 727 728 tso_info->lso_v2_xmit.ip_version = 729 RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV4; 730 ip->ip_len = 0; 731 ip->ip_sum = 0; 732 733 th->th_sum = in_pseudo(ip->ip_src.s_addr, 734 ip->ip_dst.s_addr, 735 htons(IPPROTO_TCP)); 736 } 737#endif 738#if defined(INET6) && defined(INET) 739 else 740#endif 741#ifdef INET6 742 { 743 struct ip6_hdr *ip6 = 744 (struct ip6_hdr *)(m_head->m_data + ether_len); 745 struct tcphdr *th = (struct tcphdr *)(ip6 + 1); 746 747 tso_info->lso_v2_xmit.ip_version = 748 RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV6; 749 ip6->ip6_plen = 0; 750 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 751 } 752#endif 753 tso_info->lso_v2_xmit.tcp_header_offset = 0; 754 tso_info->lso_v2_xmit.mss = m_head->m_pkthdr.tso_segsz; 755 756pre_send: 757 rndis_mesg->msg_len = packet->tot_data_buf_len + rndis_msg_size; 758 packet->tot_data_buf_len = rndis_mesg->msg_len; 759 760 /* send packet with send buffer */ 761 if (packet->tot_data_buf_len < net_dev->send_section_size) { 762 send_buf_section_idx = 763 hv_nv_get_next_send_section(net_dev); 764 if (send_buf_section_idx != 765 NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) { 766 char *dest = ((char *)net_dev->send_buf + 767 send_buf_section_idx * 768 net_dev->send_section_size); 769 770 memcpy(dest, rndis_mesg, rndis_msg_size); 771 dest += rndis_msg_size; 772 for (m = m_head; m != NULL; m = m->m_next) { 773 if (m->m_len) { 774 memcpy(dest, 775 (void *)mtod(m, vm_offset_t), 776 m->m_len); 777 dest += m->m_len; 778 } 779 } 780 781 packet->send_buf_section_idx = 782 send_buf_section_idx; 783 packet->send_buf_section_size = 784 packet->tot_data_buf_len; 785 packet->page_buf_count = 0; 786 goto do_send; 787 } 788 } 789 790 /* send packet with page buffer */ 791 packet->page_buffers[0].pfn = 792 atop(hv_get_phys_addr(rndis_mesg)); 793 packet->page_buffers[0].offset = 794 (unsigned long)rndis_mesg & PAGE_MASK; 795 packet->page_buffers[0].length = rndis_msg_size; 796 797 /* 798 * Fill the page buffers with mbuf info starting at index 799 * HV_RF_NUM_TX_RESERVED_PAGE_BUFS. 800 */ 801 i = HV_RF_NUM_TX_RESERVED_PAGE_BUFS; 802 for (m = m_head; m != NULL; m = m->m_next) { 803 if (m->m_len) { 804 vm_offset_t paddr = 805 vtophys(mtod(m, vm_offset_t)); 806 packet->page_buffers[i].pfn = 807 paddr >> PAGE_SHIFT; 808 packet->page_buffers[i].offset = 809 paddr & (PAGE_SIZE - 1); 810 packet->page_buffers[i].length = m->m_len; 811 i++; 812 } 813 } 814 815 packet->send_buf_section_idx = 816 NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX; 817 packet->send_buf_section_size = 0; 818 819do_send: 820 821 /* 822 * If bpf, copy the mbuf chain. This is less expensive than 823 * it appears; the mbuf clusters are not copied, only their 824 * reference counts are incremented. 825 * Needed to avoid a race condition where the completion 826 * callback is invoked, freeing the mbuf chain, before the 827 * bpf_mtap code has a chance to run. 828 */ 829 if (ifp->if_bpf) { 830 mc_head = m_copypacket(m_head, M_DONTWAIT); 831 } 832retry_send: 833 /* Set the completion routine */ 834 packet->compl.send.on_send_completion = netvsc_xmit_completion; 835 packet->compl.send.send_completion_context = packet; 836 packet->compl.send.send_completion_tid = (uint64_t)(uintptr_t)m_head; 837 838 /* Removed critical_enter(), does not appear necessary */ 839 ret = hv_nv_on_send(device_ctx, packet); 840 if (ret == 0) { 841 ifp->if_opackets++; 842 /* if bpf && mc_head, call bpf_mtap code */ 843 if (mc_head) { 844 ETHER_BPF_MTAP(ifp, mc_head); 845 } 846 } else { 847 retries++; 848 if (retries < 4) { 849 goto retry_send; 850 } 851 852 IF_PREPEND(&ifp->if_snd, m_head); 853 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 854 855 /* 856 * Null the mbuf pointer so the completion function 857 * does not free the mbuf chain. We just pushed the 858 * mbuf chain back on the if_snd queue. 859 */ 860 packet->compl.send.send_completion_tid = 0; 861 862 /* 863 * Release the resources since we will not get any 864 * send completion 865 */ 866 netvsc_xmit_completion(packet); 867 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 868 } 869 870 /* if bpf && mc_head, free the mbuf chain copy */ 871 if (mc_head) { 872 m_freem(mc_head); 873 } 874 } 875 876 return (ret); 877} 878 879/* 880 * Link up/down notification 881 */ 882void 883netvsc_linkstatus_callback(struct hv_device *device_obj, uint32_t status) 884{ 885 hn_softc_t *sc = device_get_softc(device_obj->device); 886 887 if (sc == NULL) { 888 return; 889 } 890 891 if (status == 1) { 892 sc->hn_carrier = 1; 893 } else { 894 sc->hn_carrier = 0; 895 } 896} 897 898/* 899 * Append the specified data to the indicated mbuf chain, 900 * Extend the mbuf chain if the new data does not fit in 901 * existing space. 902 * 903 * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c. 904 * There should be an equivalent in the kernel mbuf code, 905 * but there does not appear to be one yet. 906 * 907 * Differs from m_append() in that additional mbufs are 908 * allocated with cluster size MJUMPAGESIZE, and filled 909 * accordingly. 910 * 911 * Return 1 if able to complete the job; otherwise 0. 912 */ 913static int 914hv_m_append(struct mbuf *m0, int len, c_caddr_t cp) 915{ 916 struct mbuf *m, *n; 917 int remainder, space; 918 919 for (m = m0; m->m_next != NULL; m = m->m_next) 920 ; 921 remainder = len; 922 space = M_TRAILINGSPACE(m); 923 if (space > 0) { 924 /* 925 * Copy into available space. 926 */ 927 if (space > remainder) 928 space = remainder; 929 bcopy(cp, mtod(m, caddr_t) + m->m_len, space); 930 m->m_len += space; 931 cp += space; 932 remainder -= space; 933 } 934 while (remainder > 0) { 935 /* 936 * Allocate a new mbuf; could check space 937 * and allocate a cluster instead. 938 */ 939 n = m_getjcl(M_DONTWAIT, m->m_type, 0, MJUMPAGESIZE); 940 if (n == NULL) 941 break; 942 n->m_len = min(MJUMPAGESIZE, remainder); 943 bcopy(cp, mtod(n, caddr_t), n->m_len); 944 cp += n->m_len; 945 remainder -= n->m_len; 946 m->m_next = n; 947 m = n; 948 } 949 if (m0->m_flags & M_PKTHDR) 950 m0->m_pkthdr.len += len - remainder; 951 952 return (remainder == 0); 953} 954 955 956/* 957 * Called when we receive a data packet from the "wire" on the 958 * specified device 959 * 960 * Note: This is no longer used as a callback 961 */ 962int 963netvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet, 964 rndis_tcp_ip_csum_info *csum_info) 965{ 966 hn_softc_t *sc = (hn_softc_t *)device_get_softc(device_ctx->device); 967 struct mbuf *m_new; 968 struct ifnet *ifp; 969 device_t dev = device_ctx->device; 970 int size, do_lro = 0; 971 972 if (sc == NULL) { 973 return (0); /* TODO: KYS how can this be! */ 974 } 975 976 ifp = sc->hn_ifp; 977 978 ifp = sc->arpcom.ac_ifp; 979 980 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 981 return (0); 982 } 983 984 /* 985 * Bail out if packet contains more data than configured MTU. 986 */ 987 if (packet->tot_data_buf_len > (ifp->if_mtu + ETHER_HDR_LEN)) { 988 return (0); 989 } else if (packet->tot_data_buf_len <= MHLEN) { 990 m_new = m_gethdr(M_NOWAIT, MT_DATA); 991 if (m_new == NULL) 992 return (0); 993 memcpy(mtod(m_new, void *), packet->data, 994 packet->tot_data_buf_len); 995 m_new->m_pkthdr.len = m_new->m_len = packet->tot_data_buf_len; 996 sc->hn_small_pkts++; 997 } else { 998 /* 999 * Get an mbuf with a cluster. For packets 2K or less, 1000 * get a standard 2K cluster. For anything larger, get a 1001 * 4K cluster. Any buffers larger than 4K can cause problems 1002 * if looped around to the Hyper-V TX channel, so avoid them. 1003 */ 1004 size = MCLBYTES; 1005 if (packet->tot_data_buf_len > MCLBYTES) { 1006 /* 4096 */ 1007 size = MJUMPAGESIZE; 1008 } 1009 1010 m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size); 1011 if (m_new == NULL) { 1012 device_printf(dev, "alloc mbuf failed.\n"); 1013 return (0); 1014 } 1015 1016 hv_m_append(m_new, packet->tot_data_buf_len, packet->data); 1017 } 1018 m_new->m_pkthdr.rcvif = ifp; 1019 1020 /* receive side checksum offload */ 1021 if (NULL != csum_info) { 1022 /* IP csum offload */ 1023 if (csum_info->receive.ip_csum_succeeded) { 1024 m_new->m_pkthdr.csum_flags |= 1025 (CSUM_IP_CHECKED | CSUM_IP_VALID); 1026 sc->hn_csum_ip++; 1027 } 1028 1029 /* TCP csum offload */ 1030 if (csum_info->receive.tcp_csum_succeeded) { 1031 m_new->m_pkthdr.csum_flags |= 1032 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1033 m_new->m_pkthdr.csum_data = 0xffff; 1034 sc->hn_csum_tcp++; 1035 } 1036 1037 if (csum_info->receive.ip_csum_succeeded && 1038 csum_info->receive.tcp_csum_succeeded) 1039 do_lro = 1; 1040 } else { 1041 const struct ether_header *eh; 1042 uint16_t etype; 1043 int hoff; 1044 1045 hoff = sizeof(*eh); 1046 if (m_new->m_len < hoff) 1047 goto skip; 1048 eh = mtod(m_new, struct ether_header *); 1049 etype = ntohs(eh->ether_type); 1050 if (etype == ETHERTYPE_VLAN) { 1051 const struct ether_vlan_header *evl; 1052 1053 hoff = sizeof(*evl); 1054 if (m_new->m_len < hoff) 1055 goto skip; 1056 evl = mtod(m_new, struct ether_vlan_header *); 1057 etype = ntohs(evl->evl_proto); 1058 } 1059 1060 if (etype == ETHERTYPE_IP) { 1061 int pr; 1062 1063 pr = hn_check_iplen(m_new, hoff); 1064 if (pr == IPPROTO_TCP) { 1065 if (sc->hn_trust_hosttcp) { 1066 sc->hn_csum_trusted++; 1067 m_new->m_pkthdr.csum_flags |= 1068 (CSUM_IP_CHECKED | CSUM_IP_VALID | 1069 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1070 m_new->m_pkthdr.csum_data = 0xffff; 1071 } 1072 /* Rely on SW csum verification though... */ 1073 do_lro = 1; 1074 } 1075 } 1076 } 1077skip: 1078 if ((packet->vlan_tci != 0) && 1079 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) { 1080 m_new->m_pkthdr.ether_vtag = packet->vlan_tci; 1081 m_new->m_flags |= M_VLANTAG; 1082 } 1083 1084 /* 1085 * Note: Moved RX completion back to hv_nv_on_receive() so all 1086 * messages (not just data messages) will trigger a response. 1087 */ 1088 1089 ifp->if_ipackets++; 1090 1091 if ((ifp->if_capenable & IFCAP_LRO) && do_lro) { 1092#if defined(INET) || defined(INET6) 1093 struct lro_ctrl *lro = &sc->hn_lro; 1094 1095 if (lro->lro_cnt) { 1096 sc->hn_lro_tried++; 1097 if (tcp_lro_rx(lro, m_new, 0) == 0) { 1098 /* DONE! */ 1099 return 0; 1100 } 1101 } 1102#endif 1103 } 1104 1105 /* We're not holding the lock here, so don't release it */ 1106 (*ifp->if_input)(ifp, m_new); 1107 1108 return (0); 1109} 1110 1111void 1112netvsc_recv_rollup(struct hv_device *device_ctx) 1113{ 1114#if defined(INET) || defined(INET6) 1115 hn_softc_t *sc = device_get_softc(device_ctx->device); 1116 struct lro_ctrl *lro = &sc->hn_lro; 1117 struct lro_entry *queued; 1118 1119 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { 1120 SLIST_REMOVE_HEAD(&lro->lro_active, next); 1121 tcp_lro_flush(lro, queued); 1122 } 1123#endif 1124} 1125 1126/* 1127 * Rules for using sc->temp_unusable: 1128 * 1. sc->temp_unusable can only be read or written while holding NV_LOCK() 1129 * 2. code reading sc->temp_unusable under NV_LOCK(), and finding 1130 * sc->temp_unusable set, must release NV_LOCK() and exit 1131 * 3. to retain exclusive control of the interface, 1132 * sc->temp_unusable must be set by code before releasing NV_LOCK() 1133 * 4. only code setting sc->temp_unusable can clear sc->temp_unusable 1134 * 5. code setting sc->temp_unusable must eventually clear sc->temp_unusable 1135 */ 1136 1137/* 1138 * Standard ioctl entry point. Called when the user wants to configure 1139 * the interface. 1140 */ 1141static int 1142hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1143{ 1144 hn_softc_t *sc = ifp->if_softc; 1145 struct ifreq *ifr = (struct ifreq *)data; 1146#ifdef INET 1147 struct ifaddr *ifa = (struct ifaddr *)data; 1148#endif 1149 netvsc_device_info device_info; 1150 struct hv_device *hn_dev; 1151 int mask, error = 0; 1152 int retry_cnt = 500; 1153 1154 switch(cmd) { 1155 1156 case SIOCSIFADDR: 1157#ifdef INET 1158 if (ifa->ifa_addr->sa_family == AF_INET) { 1159 ifp->if_flags |= IFF_UP; 1160 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) 1161 hn_ifinit(sc); 1162 arp_ifinit(ifp, ifa); 1163 } else 1164#endif 1165 error = ether_ioctl(ifp, cmd, data); 1166 break; 1167 case SIOCSIFMTU: 1168 hn_dev = vmbus_get_devctx(sc->hn_dev); 1169 1170 /* Check MTU value change */ 1171 if (ifp->if_mtu == ifr->ifr_mtu) 1172 break; 1173 1174 if (ifr->ifr_mtu > NETVSC_MAX_CONFIGURABLE_MTU) { 1175 error = EINVAL; 1176 break; 1177 } 1178 1179 /* Obtain and record requested MTU */ 1180 ifp->if_mtu = ifr->ifr_mtu; 1181 /* 1182 * Make sure that LRO high watermark is still valid, 1183 * after MTU change (the 2*MTU limit). 1184 */ 1185 if (!HN_LRO_HIWAT_ISVALID(sc, sc->hn_lro_hiwat)) 1186 hn_set_lro_hiwat(sc, HN_LRO_HIWAT_MTULIM(ifp)); 1187 1188 do { 1189 NV_LOCK(sc); 1190 if (!sc->temp_unusable) { 1191 sc->temp_unusable = TRUE; 1192 retry_cnt = -1; 1193 } 1194 NV_UNLOCK(sc); 1195 if (retry_cnt > 0) { 1196 retry_cnt--; 1197 DELAY(5 * 1000); 1198 } 1199 } while (retry_cnt > 0); 1200 1201 if (retry_cnt == 0) { 1202 error = EINVAL; 1203 break; 1204 } 1205 1206 /* We must remove and add back the device to cause the new 1207 * MTU to take effect. This includes tearing down, but not 1208 * deleting the channel, then bringing it back up. 1209 */ 1210 error = hv_rf_on_device_remove(hn_dev, HV_RF_NV_RETAIN_CHANNEL); 1211 if (error) { 1212 NV_LOCK(sc); 1213 sc->temp_unusable = FALSE; 1214 NV_UNLOCK(sc); 1215 break; 1216 } 1217 error = hv_rf_on_device_add(hn_dev, &device_info); 1218 if (error) { 1219 NV_LOCK(sc); 1220 sc->temp_unusable = FALSE; 1221 NV_UNLOCK(sc); 1222 break; 1223 } 1224 1225 hn_ifinit_locked(sc); 1226 1227 NV_LOCK(sc); 1228 sc->temp_unusable = FALSE; 1229 NV_UNLOCK(sc); 1230 break; 1231 case SIOCSIFFLAGS: 1232 do { 1233 NV_LOCK(sc); 1234 if (!sc->temp_unusable) { 1235 sc->temp_unusable = TRUE; 1236 retry_cnt = -1; 1237 } 1238 NV_UNLOCK(sc); 1239 if (retry_cnt > 0) { 1240 retry_cnt--; 1241 DELAY(5 * 1000); 1242 } 1243 } while (retry_cnt > 0); 1244 1245 if (retry_cnt == 0) { 1246 error = EINVAL; 1247 break; 1248 } 1249 1250 if (ifp->if_flags & IFF_UP) { 1251 /* 1252 * If only the state of the PROMISC flag changed, 1253 * then just use the 'set promisc mode' command 1254 * instead of reinitializing the entire NIC. Doing 1255 * a full re-init means reloading the firmware and 1256 * waiting for it to start up, which may take a 1257 * second or two. 1258 */ 1259#ifdef notyet 1260 /* Fixme: Promiscuous mode? */ 1261 if (ifp->if_drv_flags & IFF_DRV_RUNNING && 1262 ifp->if_flags & IFF_PROMISC && 1263 !(sc->hn_if_flags & IFF_PROMISC)) { 1264 /* do something here for Hyper-V */ 1265 } else if (ifp->if_drv_flags & IFF_DRV_RUNNING && 1266 !(ifp->if_flags & IFF_PROMISC) && 1267 sc->hn_if_flags & IFF_PROMISC) { 1268 /* do something here for Hyper-V */ 1269 } else 1270#endif 1271 hn_ifinit_locked(sc); 1272 } else { 1273 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1274 hn_stop(sc); 1275 } 1276 } 1277 NV_LOCK(sc); 1278 sc->temp_unusable = FALSE; 1279 NV_UNLOCK(sc); 1280 sc->hn_if_flags = ifp->if_flags; 1281 error = 0; 1282 break; 1283 case SIOCSIFCAP: 1284 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 1285 if (mask & IFCAP_TXCSUM) { 1286 if (IFCAP_TXCSUM & ifp->if_capenable) { 1287 ifp->if_capenable &= ~IFCAP_TXCSUM; 1288 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP); 1289 } else { 1290 ifp->if_capenable |= IFCAP_TXCSUM; 1291 /* 1292 * Only enable UDP checksum offloading on 1293 * Windows Server 2012R2 or later releases. 1294 */ 1295 if (hv_vmbus_protocal_version >= 1296 HV_VMBUS_VERSION_WIN8_1) { 1297 ifp->if_hwassist |= 1298 (CSUM_TCP | CSUM_UDP); 1299 } else { 1300 ifp->if_hwassist |= CSUM_TCP; 1301 } 1302 } 1303 } 1304 1305 if (mask & IFCAP_RXCSUM) { 1306 if (IFCAP_RXCSUM & ifp->if_capenable) { 1307 ifp->if_capenable &= ~IFCAP_RXCSUM; 1308 } else { 1309 ifp->if_capenable |= IFCAP_RXCSUM; 1310 } 1311 } 1312 if (mask & IFCAP_LRO) 1313 ifp->if_capenable ^= IFCAP_LRO; 1314 1315 if (mask & IFCAP_TSO4) { 1316 ifp->if_capenable ^= IFCAP_TSO4; 1317 ifp->if_hwassist ^= CSUM_IP_TSO; 1318 } 1319 1320 if (mask & IFCAP_TSO6) { 1321 ifp->if_capenable ^= IFCAP_TSO6; 1322 ifp->if_hwassist ^= CSUM_IP6_TSO; 1323 } 1324 1325 error = 0; 1326 break; 1327 case SIOCADDMULTI: 1328 case SIOCDELMULTI: 1329#ifdef notyet 1330 /* Fixme: Multicast mode? */ 1331 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1332 NV_LOCK(sc); 1333 netvsc_setmulti(sc); 1334 NV_UNLOCK(sc); 1335 error = 0; 1336 } 1337#endif 1338 error = EINVAL; 1339 break; 1340 case SIOCSIFMEDIA: 1341 case SIOCGIFMEDIA: 1342 error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd); 1343 break; 1344 default: 1345 error = ether_ioctl(ifp, cmd, data); 1346 break; 1347 } 1348 1349 return (error); 1350} 1351 1352/* 1353 * 1354 */ 1355static void 1356hn_stop(hn_softc_t *sc) 1357{ 1358 struct ifnet *ifp; 1359 int ret; 1360 struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev); 1361 1362 ifp = sc->hn_ifp; 1363 1364 if (bootverbose) 1365 printf(" Closing Device ...\n"); 1366 1367 ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); 1368 if_link_state_change(ifp, LINK_STATE_DOWN); 1369 sc->hn_initdone = 0; 1370 1371 ret = hv_rf_on_close(device_ctx); 1372} 1373 1374/* 1375 * FreeBSD transmit entry point 1376 */ 1377static void 1378hn_start(struct ifnet *ifp) 1379{ 1380 hn_softc_t *sc; 1381 1382 sc = ifp->if_softc; 1383 NV_LOCK(sc); 1384 if (sc->temp_unusable) { 1385 NV_UNLOCK(sc); 1386 return; 1387 } 1388 hn_start_locked(ifp); 1389 NV_UNLOCK(sc); 1390} 1391 1392/* 1393 * 1394 */ 1395static void 1396hn_ifinit_locked(hn_softc_t *sc) 1397{ 1398 struct ifnet *ifp; 1399 struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev); 1400 int ret; 1401 1402 ifp = sc->hn_ifp; 1403 1404 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1405 return; 1406 } 1407 1408 hv_promisc_mode = 1; 1409 1410 ret = hv_rf_on_open(device_ctx); 1411 if (ret != 0) { 1412 return; 1413 } else { 1414 sc->hn_initdone = 1; 1415 } 1416 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1417 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 1418 if_link_state_change(ifp, LINK_STATE_UP); 1419} 1420 1421/* 1422 * 1423 */ 1424static void 1425hn_ifinit(void *xsc) 1426{ 1427 hn_softc_t *sc = xsc; 1428 1429 NV_LOCK(sc); 1430 if (sc->temp_unusable) { 1431 NV_UNLOCK(sc); 1432 return; 1433 } 1434 sc->temp_unusable = TRUE; 1435 NV_UNLOCK(sc); 1436 1437 hn_ifinit_locked(sc); 1438 1439 NV_LOCK(sc); 1440 sc->temp_unusable = FALSE; 1441 NV_UNLOCK(sc); 1442} 1443 1444#ifdef LATER 1445/* 1446 * 1447 */ 1448static void 1449hn_watchdog(struct ifnet *ifp) 1450{ 1451 hn_softc_t *sc; 1452 sc = ifp->if_softc; 1453 1454 printf("hn%d: watchdog timeout -- resetting\n", sc->hn_unit); 1455 hn_ifinit(sc); /*???*/ 1456 ifp->if_oerrors++; 1457} 1458#endif 1459 1460#ifdef HN_LRO_HIWAT 1461static int 1462hn_lro_hiwat_sysctl(SYSCTL_HANDLER_ARGS) 1463{ 1464 struct hn_softc *sc = arg1; 1465 int hiwat, error; 1466 1467 hiwat = sc->hn_lro_hiwat; 1468 error = sysctl_handle_int(oidp, &hiwat, 0, req); 1469 if (error || req->newptr == NULL) 1470 return error; 1471 1472 if (!HN_LRO_HIWAT_ISVALID(sc, hiwat)) 1473 return EINVAL; 1474 1475 if (sc->hn_lro_hiwat != hiwat) 1476 hn_set_lro_hiwat(sc, hiwat); 1477 return 0; 1478} 1479#endif /* HN_LRO_HIWAT */ 1480 1481static int 1482hn_check_iplen(const struct mbuf *m, int hoff) 1483{ 1484 const struct ip *ip; 1485 int len, iphlen, iplen; 1486 const struct tcphdr *th; 1487 int thoff; /* TCP data offset */ 1488 1489 len = hoff + sizeof(struct ip); 1490 1491 /* The packet must be at least the size of an IP header. */ 1492 if (m->m_pkthdr.len < len) 1493 return IPPROTO_DONE; 1494 1495 /* The fixed IP header must reside completely in the first mbuf. */ 1496 if (m->m_len < len) 1497 return IPPROTO_DONE; 1498 1499 ip = mtodo(m, hoff); 1500 1501 /* Bound check the packet's stated IP header length. */ 1502 iphlen = ip->ip_hl << 2; 1503 if (iphlen < sizeof(struct ip)) /* minimum header length */ 1504 return IPPROTO_DONE; 1505 1506 /* The full IP header must reside completely in the one mbuf. */ 1507 if (m->m_len < hoff + iphlen) 1508 return IPPROTO_DONE; 1509 1510 iplen = ntohs(ip->ip_len); 1511 1512 /* 1513 * Check that the amount of data in the buffers is as 1514 * at least much as the IP header would have us expect. 1515 */ 1516 if (m->m_pkthdr.len < hoff + iplen) 1517 return IPPROTO_DONE; 1518 1519 /* 1520 * Ignore IP fragments. 1521 */ 1522 if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF)) 1523 return IPPROTO_DONE; 1524 1525 /* 1526 * The TCP/IP or UDP/IP header must be entirely contained within 1527 * the first fragment of a packet. 1528 */ 1529 switch (ip->ip_p) { 1530 case IPPROTO_TCP: 1531 if (iplen < iphlen + sizeof(struct tcphdr)) 1532 return IPPROTO_DONE; 1533 if (m->m_len < hoff + iphlen + sizeof(struct tcphdr)) 1534 return IPPROTO_DONE; 1535 th = (const struct tcphdr *)((const uint8_t *)ip + iphlen); 1536 thoff = th->th_off << 2; 1537 if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen) 1538 return IPPROTO_DONE; 1539 if (m->m_len < hoff + iphlen + thoff) 1540 return IPPROTO_DONE; 1541 break; 1542 case IPPROTO_UDP: 1543 if (iplen < iphlen + sizeof(struct udphdr)) 1544 return IPPROTO_DONE; 1545 if (m->m_len < hoff + iphlen + sizeof(struct udphdr)) 1546 return IPPROTO_DONE; 1547 break; 1548 default: 1549 if (iplen < iphlen) 1550 return IPPROTO_DONE; 1551 break; 1552 } 1553 return ip->ip_p; 1554} 1555 1556static device_method_t netvsc_methods[] = { 1557 /* Device interface */ 1558 DEVMETHOD(device_probe, netvsc_probe), 1559 DEVMETHOD(device_attach, netvsc_attach), 1560 DEVMETHOD(device_detach, netvsc_detach), 1561 DEVMETHOD(device_shutdown, netvsc_shutdown), 1562 1563 { 0, 0 } 1564}; 1565 1566static driver_t netvsc_driver = { 1567 NETVSC_DEVNAME, 1568 netvsc_methods, 1569 sizeof(hn_softc_t) 1570}; 1571 1572static devclass_t netvsc_devclass; 1573 1574DRIVER_MODULE(hn, vmbus, netvsc_driver, netvsc_devclass, 0, 0); 1575MODULE_VERSION(hn, 1); 1576MODULE_DEPEND(hn, vmbus, 1, 1, 1); 1577