hv_netvsc_drv_freebsd.c revision 292439
1/*- 2 * Copyright (c) 2010-2012 Citrix Inc. 3 * Copyright (c) 2009-2012 Microsoft Corp. 4 * Copyright (c) 2012 NetApp Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29/*- 30 * Copyright (c) 2004-2006 Kip Macy 31 * All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 52 * SUCH DAMAGE. 53 */ 54 55#include <sys/cdefs.h> 56__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c 292439 2015-12-18 14:56:49Z royger $"); 57 58#include "opt_inet6.h" 59#include "opt_inet.h" 60 61#include <sys/param.h> 62#include <sys/systm.h> 63#include <sys/sockio.h> 64#include <sys/mbuf.h> 65#include <sys/malloc.h> 66#include <sys/module.h> 67#include <sys/kernel.h> 68#include <sys/socket.h> 69#include <sys/queue.h> 70#include <sys/lock.h> 71#include <sys/sx.h> 72 73#include <net/if.h> 74#include <net/if_arp.h> 75#include <net/ethernet.h> 76#include <net/if_dl.h> 77#include <net/if_media.h> 78 79#include <net/bpf.h> 80 81#include <net/if_types.h> 82#include <net/if_vlan_var.h> 83#include <net/if.h> 84 85#include <netinet/in_systm.h> 86#include <netinet/in.h> 87#include <netinet/ip.h> 88#include <netinet/if_ether.h> 89#include <netinet/tcp.h> 90#include <netinet/udp.h> 91#include <netinet/ip6.h> 92 93#include <vm/vm.h> 94#include <vm/vm_param.h> 95#include <vm/vm_kern.h> 96#include <vm/pmap.h> 97 98#include <machine/bus.h> 99#include <machine/resource.h> 100#include <machine/frame.h> 101#include <machine/vmparam.h> 102 103#include <sys/bus.h> 104#include <sys/rman.h> 105#include <sys/mutex.h> 106#include <sys/errno.h> 107#include <sys/types.h> 108#include <machine/atomic.h> 109 110#include <machine/intr_machdep.h> 111 112#include <machine/in_cksum.h> 113 114#include <dev/hyperv/include/hyperv.h> 115#include "hv_net_vsc.h" 116#include "hv_rndis.h" 117#include "hv_rndis_filter.h" 118 119 120/* Short for Hyper-V network interface */ 121#define NETVSC_DEVNAME "hn" 122 123/* 124 * It looks like offset 0 of buf is reserved to hold the softc pointer. 125 * The sc pointer evidently not needed, and is not presently populated. 126 * The packet offset is where the netvsc_packet starts in the buffer. 127 */ 128#define HV_NV_SC_PTR_OFFSET_IN_BUF 0 129#define HV_NV_PACKET_OFFSET_IN_BUF 16 130 131/* 132 * A unified flag for all outbound check sum flags is useful, 133 * and it helps avoiding unnecessary check sum calculation in 134 * network forwarding scenario. 135 */ 136#define HV_CSUM_FOR_OUTBOUND \ 137 (CSUM_IP|CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP|CSUM_IP_TSO| \ 138 CSUM_IP_ISCSI|CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP| \ 139 CSUM_IP6_TSO|CSUM_IP6_ISCSI) 140 141/* 142 * Data types 143 */ 144 145struct hv_netvsc_driver_context { 146 uint32_t drv_inited; 147}; 148 149/* 150 * Be aware that this sleepable mutex will exhibit WITNESS errors when 151 * certain TCP and ARP code paths are taken. This appears to be a 152 * well-known condition, as all other drivers checked use a sleeping 153 * mutex to protect their transmit paths. 154 * Also Be aware that mutexes do not play well with semaphores, and there 155 * is a conflicting semaphore in a certain channel code path. 156 */ 157#define NV_LOCK_INIT(_sc, _name) \ 158 mtx_init(&(_sc)->hn_lock, _name, MTX_NETWORK_LOCK, MTX_DEF) 159#define NV_LOCK(_sc) mtx_lock(&(_sc)->hn_lock) 160#define NV_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->hn_lock, MA_OWNED) 161#define NV_UNLOCK(_sc) mtx_unlock(&(_sc)->hn_lock) 162#define NV_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->hn_lock) 163 164 165/* 166 * Globals 167 */ 168 169int hv_promisc_mode = 0; /* normal mode by default */ 170 171/* The one and only one */ 172static struct hv_netvsc_driver_context g_netvsc_drv; 173 174 175/* 176 * Forward declarations 177 */ 178static void hn_stop(hn_softc_t *sc); 179static void hn_ifinit_locked(hn_softc_t *sc); 180static void hn_ifinit(void *xsc); 181static int hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); 182static int hn_start_locked(struct ifnet *ifp); 183static void hn_start(struct ifnet *ifp); 184 185/* 186 * NetVsc get message transport protocol type 187 */ 188static uint32_t get_transport_proto_type(struct mbuf *m_head) 189{ 190 uint32_t ret_val = TRANSPORT_TYPE_NOT_IP; 191 uint16_t ether_type = 0; 192 int ether_len = 0; 193 struct ether_vlan_header *eh; 194#ifdef INET 195 struct ip *iph; 196#endif 197#ifdef INET6 198 struct ip6_hdr *ip6; 199#endif 200 201 eh = mtod(m_head, struct ether_vlan_header*); 202 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 203 ether_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 204 ether_type = eh->evl_proto; 205 } else { 206 ether_len = ETHER_HDR_LEN; 207 ether_type = eh->evl_encap_proto; 208 } 209 210 switch (ntohs(ether_type)) { 211#ifdef INET6 212 case ETHERTYPE_IPV6: 213 ip6 = (struct ip6_hdr *)(m_head->m_data + ether_len); 214 215 if (IPPROTO_TCP == ip6->ip6_nxt) { 216 ret_val = TRANSPORT_TYPE_IPV6_TCP; 217 } else if (IPPROTO_UDP == ip6->ip6_nxt) { 218 ret_val = TRANSPORT_TYPE_IPV6_UDP; 219 } 220 break; 221#endif 222#ifdef INET 223 case ETHERTYPE_IP: 224 iph = (struct ip *)(m_head->m_data + ether_len); 225 226 if (IPPROTO_TCP == iph->ip_p) { 227 ret_val = TRANSPORT_TYPE_IPV4_TCP; 228 } else if (IPPROTO_UDP == iph->ip_p) { 229 ret_val = TRANSPORT_TYPE_IPV4_UDP; 230 } 231 break; 232#endif 233 default: 234 ret_val = TRANSPORT_TYPE_NOT_IP; 235 break; 236 } 237 238 return (ret_val); 239} 240 241/* 242 * NetVsc driver initialization 243 * Note: Filter init is no longer required 244 */ 245static int 246netvsc_drv_init(void) 247{ 248 return (0); 249} 250 251/* 252 * NetVsc global initialization entry point 253 */ 254static void 255netvsc_init(void) 256{ 257 if (bootverbose) 258 printf("Netvsc initializing... "); 259 260 /* 261 * XXXKYS: cleanup initialization 262 */ 263 if (!cold && !g_netvsc_drv.drv_inited) { 264 g_netvsc_drv.drv_inited = 1; 265 netvsc_drv_init(); 266 if (bootverbose) 267 printf("done!\n"); 268 } else if (bootverbose) 269 printf("Already initialized!\n"); 270} 271 272/* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */ 273static const hv_guid g_net_vsc_device_type = { 274 .data = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46, 275 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E} 276}; 277 278/* 279 * Standard probe entry point. 280 * 281 */ 282static int 283netvsc_probe(device_t dev) 284{ 285 const char *p; 286 287 p = vmbus_get_type(dev); 288 if (!memcmp(p, &g_net_vsc_device_type.data, sizeof(hv_guid))) { 289 device_set_desc(dev, "Synthetic Network Interface"); 290 if (bootverbose) 291 printf("Netvsc probe... DONE \n"); 292 293 return (BUS_PROBE_DEFAULT); 294 } 295 296 return (ENXIO); 297} 298 299/* 300 * Standard attach entry point. 301 * 302 * Called when the driver is loaded. It allocates needed resources, 303 * and initializes the "hardware" and software. 304 */ 305static int 306netvsc_attach(device_t dev) 307{ 308 struct hv_device *device_ctx = vmbus_get_devctx(dev); 309 netvsc_device_info device_info; 310 hn_softc_t *sc; 311 int unit = device_get_unit(dev); 312 struct ifnet *ifp; 313 int ret; 314 315 netvsc_init(); 316 317 sc = device_get_softc(dev); 318 if (sc == NULL) { 319 return (ENOMEM); 320 } 321 322 bzero(sc, sizeof(hn_softc_t)); 323 sc->hn_unit = unit; 324 sc->hn_dev = dev; 325 326 NV_LOCK_INIT(sc, "NetVSCLock"); 327 328 sc->hn_dev_obj = device_ctx; 329 330 ifp = sc->hn_ifp = sc->arpcom.ac_ifp = if_alloc(IFT_ETHER); 331 ifp->if_softc = sc; 332 333 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 334 ifp->if_dunit = unit; 335 ifp->if_dname = NETVSC_DEVNAME; 336 337 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 338 ifp->if_ioctl = hn_ioctl; 339 ifp->if_start = hn_start; 340 ifp->if_init = hn_ifinit; 341 /* needed by hv_rf_on_device_add() code */ 342 ifp->if_mtu = ETHERMTU; 343 IFQ_SET_MAXLEN(&ifp->if_snd, 512); 344 ifp->if_snd.ifq_drv_maxlen = 511; 345 IFQ_SET_READY(&ifp->if_snd); 346 347 /* 348 * Tell upper layers that we support full VLAN capability. 349 */ 350 ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); 351 ifp->if_capabilities |= 352 IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO; 353 ifp->if_capenable |= 354 IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO; 355 /* 356 * Only enable UDP checksum offloading when it is on 2012R2 or 357 * later. UDP checksum offloading doesn't work on earlier 358 * Windows releases. 359 */ 360 if (hv_vmbus_protocal_version >= HV_VMBUS_VERSION_WIN8_1) 361 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 362 else 363 ifp->if_hwassist = CSUM_TCP | CSUM_TSO; 364 365 ret = hv_rf_on_device_add(device_ctx, &device_info); 366 if (ret != 0) { 367 if_free(ifp); 368 369 return (ret); 370 } 371 if (device_info.link_state == 0) { 372 sc->hn_carrier = 1; 373 } 374 375 ether_ifattach(ifp, device_info.mac_addr); 376 377 return (0); 378} 379 380/* 381 * Standard detach entry point 382 */ 383static int 384netvsc_detach(device_t dev) 385{ 386 struct hv_device *hv_device = vmbus_get_devctx(dev); 387 388 if (bootverbose) 389 printf("netvsc_detach\n"); 390 391 /* 392 * XXXKYS: Need to clean up all our 393 * driver state; this is the driver 394 * unloading. 395 */ 396 397 /* 398 * XXXKYS: Need to stop outgoing traffic and unregister 399 * the netdevice. 400 */ 401 402 hv_rf_on_device_remove(hv_device, HV_RF_NV_DESTROY_CHANNEL); 403 404 return (0); 405} 406 407/* 408 * Standard shutdown entry point 409 */ 410static int 411netvsc_shutdown(device_t dev) 412{ 413 return (0); 414} 415 416/* 417 * Send completion processing 418 * 419 * Note: It looks like offset 0 of buf is reserved to hold the softc 420 * pointer. The sc pointer is not currently needed in this function, and 421 * it is not presently populated by the TX function. 422 */ 423void 424netvsc_xmit_completion(void *context) 425{ 426 netvsc_packet *packet = (netvsc_packet *)context; 427 struct mbuf *mb; 428 uint8_t *buf; 429 430 mb = (struct mbuf *)(uintptr_t)packet->compl.send.send_completion_tid; 431 buf = ((uint8_t *)packet) - HV_NV_PACKET_OFFSET_IN_BUF; 432 433 free(buf, M_NETVSC); 434 435 if (mb != NULL) { 436 m_freem(mb); 437 } 438} 439 440/* 441 * Start a transmit of one or more packets 442 */ 443static int 444hn_start_locked(struct ifnet *ifp) 445{ 446 hn_softc_t *sc = ifp->if_softc; 447 struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev); 448 netvsc_dev *net_dev = sc->net_dev; 449 device_t dev = device_ctx->device; 450 uint8_t *buf; 451 netvsc_packet *packet; 452 struct mbuf *m_head, *m; 453 struct mbuf *mc_head = NULL; 454 struct ether_vlan_header *eh; 455 rndis_msg *rndis_mesg; 456 rndis_packet *rndis_pkt; 457 rndis_per_packet_info *rppi; 458 ndis_8021q_info *rppi_vlan_info; 459 rndis_tcp_ip_csum_info *csum_info; 460 rndis_tcp_tso_info *tso_info; 461 int ether_len; 462 int i; 463 int num_frags; 464 int len; 465 int retries = 0; 466 int ret = 0; 467 uint32_t rndis_msg_size = 0; 468 uint32_t trans_proto_type; 469 uint32_t send_buf_section_idx = 470 NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX; 471 472 while (!IFQ_DRV_IS_EMPTY(&sc->hn_ifp->if_snd)) { 473 IFQ_DRV_DEQUEUE(&sc->hn_ifp->if_snd, m_head); 474 if (m_head == NULL) { 475 break; 476 } 477 478 len = 0; 479 num_frags = 0; 480 481 /* Walk the mbuf list computing total length and num frags */ 482 for (m = m_head; m != NULL; m = m->m_next) { 483 if (m->m_len != 0) { 484 num_frags++; 485 len += m->m_len; 486 } 487 } 488 489 /* 490 * Reserve the number of pages requested. Currently, 491 * one page is reserved for the message in the RNDIS 492 * filter packet 493 */ 494 num_frags += HV_RF_NUM_TX_RESERVED_PAGE_BUFS; 495 496 /* If exceeds # page_buffers in netvsc_packet */ 497 if (num_frags > NETVSC_PACKET_MAXPAGE) { 498 device_printf(dev, "exceed max page buffers,%d,%d\n", 499 num_frags, NETVSC_PACKET_MAXPAGE); 500 m_freem(m_head); 501 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 502 return (EINVAL); 503 } 504 505 /* 506 * Allocate a buffer with space for a netvsc packet plus a 507 * number of reserved areas. First comes a (currently 16 508 * bytes, currently unused) reserved data area. Second is 509 * the netvsc_packet. Third is an area reserved for an 510 * rndis_filter_packet struct. Fourth (optional) is a 511 * rndis_per_packet_info struct. 512 * Changed malloc to M_NOWAIT to avoid sleep under spin lock. 513 * No longer reserving extra space for page buffers, as they 514 * are already part of the netvsc_packet. 515 */ 516 buf = malloc(HV_NV_PACKET_OFFSET_IN_BUF + 517 sizeof(netvsc_packet) + 518 sizeof(rndis_msg) + 519 RNDIS_VLAN_PPI_SIZE + 520 RNDIS_TSO_PPI_SIZE + 521 RNDIS_CSUM_PPI_SIZE, 522 M_NETVSC, M_ZERO | M_NOWAIT); 523 if (buf == NULL) { 524 device_printf(dev, "hn:malloc packet failed\n"); 525 m_freem(m_head); 526 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 527 return (ENOMEM); 528 } 529 530 packet = (netvsc_packet *)(buf + HV_NV_PACKET_OFFSET_IN_BUF); 531 *(vm_offset_t *)buf = HV_NV_SC_PTR_OFFSET_IN_BUF; 532 533 packet->is_data_pkt = TRUE; 534 535 /* Set up the rndis header */ 536 packet->page_buf_count = num_frags; 537 538 /* Initialize it from the mbuf */ 539 packet->tot_data_buf_len = len; 540 541 /* 542 * extension points to the area reserved for the 543 * rndis_filter_packet, which is placed just after 544 * the netvsc_packet (and rppi struct, if present; 545 * length is updated later). 546 */ 547 packet->rndis_mesg = packet + 1; 548 rndis_mesg = (rndis_msg *)packet->rndis_mesg; 549 rndis_mesg->ndis_msg_type = REMOTE_NDIS_PACKET_MSG; 550 551 rndis_pkt = &rndis_mesg->msg.packet; 552 rndis_pkt->data_offset = sizeof(rndis_packet); 553 rndis_pkt->data_length = packet->tot_data_buf_len; 554 rndis_pkt->per_pkt_info_offset = sizeof(rndis_packet); 555 556 rndis_msg_size = RNDIS_MESSAGE_SIZE(rndis_packet); 557 558 /* 559 * If the Hyper-V infrastructure needs to embed a VLAN tag, 560 * initialize netvsc_packet and rppi struct values as needed. 561 */ 562 if (m_head->m_flags & M_VLANTAG) { 563 /* 564 * set up some additional fields so the Hyper-V infrastructure will stuff the VLAN tag 565 * into the frame. 566 */ 567 packet->vlan_tci = m_head->m_pkthdr.ether_vtag; 568 569 rndis_msg_size += RNDIS_VLAN_PPI_SIZE; 570 571 rppi = hv_set_rppi_data(rndis_mesg, RNDIS_VLAN_PPI_SIZE, 572 ieee_8021q_info); 573 574 /* VLAN info immediately follows rppi struct */ 575 rppi_vlan_info = (ndis_8021q_info *)((char*)rppi + 576 rppi->per_packet_info_offset); 577 /* FreeBSD does not support CFI or priority */ 578 rppi_vlan_info->u1.s1.vlan_id = 579 packet->vlan_tci & 0xfff; 580 } 581 582 /* Only check the flags for outbound and ignore the ones for inbound */ 583 if (0 == (m_head->m_pkthdr.csum_flags & HV_CSUM_FOR_OUTBOUND)) { 584 goto pre_send; 585 } 586 587 eh = mtod(m_head, struct ether_vlan_header*); 588 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 589 ether_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 590 } else { 591 ether_len = ETHER_HDR_LEN; 592 } 593 594 trans_proto_type = get_transport_proto_type(m_head); 595 if (TRANSPORT_TYPE_NOT_IP == trans_proto_type) { 596 goto pre_send; 597 } 598 599 /* 600 * TSO packet needless to setup the send side checksum 601 * offload. 602 */ 603 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 604 goto do_tso; 605 } 606 607 /* setup checksum offload */ 608 rndis_msg_size += RNDIS_CSUM_PPI_SIZE; 609 rppi = hv_set_rppi_data(rndis_mesg, RNDIS_CSUM_PPI_SIZE, 610 tcpip_chksum_info); 611 csum_info = (rndis_tcp_ip_csum_info *)((char*)rppi + 612 rppi->per_packet_info_offset); 613 614 if (trans_proto_type & (TYPE_IPV4 << 16)) { 615 csum_info->xmit.is_ipv4 = 1; 616 } else { 617 csum_info->xmit.is_ipv6 = 1; 618 } 619 620 if (trans_proto_type & TYPE_TCP) { 621 csum_info->xmit.tcp_csum = 1; 622 csum_info->xmit.tcp_header_offset = 0; 623 } else if (trans_proto_type & TYPE_UDP) { 624 csum_info->xmit.udp_csum = 1; 625 } 626 627 goto pre_send; 628 629do_tso: 630 /* setup TCP segmentation offload */ 631 rndis_msg_size += RNDIS_TSO_PPI_SIZE; 632 rppi = hv_set_rppi_data(rndis_mesg, RNDIS_TSO_PPI_SIZE, 633 tcp_large_send_info); 634 635 tso_info = (rndis_tcp_tso_info *)((char *)rppi + 636 rppi->per_packet_info_offset); 637 tso_info->lso_v2_xmit.type = 638 RNDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; 639 640#ifdef INET 641 if (trans_proto_type & (TYPE_IPV4 << 16)) { 642 struct ip *ip = 643 (struct ip *)(m_head->m_data + ether_len); 644 unsigned long iph_len = ip->ip_hl << 2; 645 struct tcphdr *th = 646 (struct tcphdr *)((caddr_t)ip + iph_len); 647 648 tso_info->lso_v2_xmit.ip_version = 649 RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV4; 650 ip->ip_len = 0; 651 ip->ip_sum = 0; 652 653 th->th_sum = in_pseudo(ip->ip_src.s_addr, 654 ip->ip_dst.s_addr, 655 htons(IPPROTO_TCP)); 656 } 657#endif 658#if defined(INET6) && defined(INET) 659 else 660#endif 661#ifdef INET6 662 { 663 struct ip6_hdr *ip6 = 664 (struct ip6_hdr *)(m_head->m_data + ether_len); 665 struct tcphdr *th = (struct tcphdr *)(ip6 + 1); 666 667 tso_info->lso_v2_xmit.ip_version = 668 RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV6; 669 ip6->ip6_plen = 0; 670 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 671 } 672#endif 673 tso_info->lso_v2_xmit.tcp_header_offset = 0; 674 tso_info->lso_v2_xmit.mss = m_head->m_pkthdr.tso_segsz; 675 676pre_send: 677 rndis_mesg->msg_len = packet->tot_data_buf_len + rndis_msg_size; 678 packet->tot_data_buf_len = rndis_mesg->msg_len; 679 680 /* send packet with send buffer */ 681 if (packet->tot_data_buf_len < net_dev->send_section_size) { 682 send_buf_section_idx = 683 hv_nv_get_next_send_section(net_dev); 684 if (send_buf_section_idx != 685 NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) { 686 char *dest = ((char *)net_dev->send_buf + 687 send_buf_section_idx * 688 net_dev->send_section_size); 689 690 memcpy(dest, rndis_mesg, rndis_msg_size); 691 dest += rndis_msg_size; 692 for (m = m_head; m != NULL; m = m->m_next) { 693 if (m->m_len) { 694 memcpy(dest, 695 (void *)mtod(m, vm_offset_t), 696 m->m_len); 697 dest += m->m_len; 698 } 699 } 700 701 packet->send_buf_section_idx = 702 send_buf_section_idx; 703 packet->send_buf_section_size = 704 packet->tot_data_buf_len; 705 packet->page_buf_count = 0; 706 goto do_send; 707 } 708 } 709 710 /* send packet with page buffer */ 711 packet->page_buffers[0].pfn = 712 atop(hv_get_phys_addr(rndis_mesg)); 713 packet->page_buffers[0].offset = 714 (unsigned long)rndis_mesg & PAGE_MASK; 715 packet->page_buffers[0].length = rndis_msg_size; 716 717 /* 718 * Fill the page buffers with mbuf info starting at index 719 * HV_RF_NUM_TX_RESERVED_PAGE_BUFS. 720 */ 721 i = HV_RF_NUM_TX_RESERVED_PAGE_BUFS; 722 for (m = m_head; m != NULL; m = m->m_next) { 723 if (m->m_len) { 724 vm_offset_t paddr = 725 vtophys(mtod(m, vm_offset_t)); 726 packet->page_buffers[i].pfn = 727 paddr >> PAGE_SHIFT; 728 packet->page_buffers[i].offset = 729 paddr & (PAGE_SIZE - 1); 730 packet->page_buffers[i].length = m->m_len; 731 i++; 732 } 733 } 734 735 packet->send_buf_section_idx = 736 NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX; 737 packet->send_buf_section_size = 0; 738 739do_send: 740 741 /* 742 * If bpf, copy the mbuf chain. This is less expensive than 743 * it appears; the mbuf clusters are not copied, only their 744 * reference counts are incremented. 745 * Needed to avoid a race condition where the completion 746 * callback is invoked, freeing the mbuf chain, before the 747 * bpf_mtap code has a chance to run. 748 */ 749 if (ifp->if_bpf) { 750 mc_head = m_copypacket(m_head, M_DONTWAIT); 751 } 752retry_send: 753 /* Set the completion routine */ 754 packet->compl.send.on_send_completion = netvsc_xmit_completion; 755 packet->compl.send.send_completion_context = packet; 756 packet->compl.send.send_completion_tid = (uint64_t)(uintptr_t)m_head; 757 758 /* Removed critical_enter(), does not appear necessary */ 759 ret = hv_nv_on_send(device_ctx, packet); 760 if (ret == 0) { 761 ifp->if_opackets++; 762 /* if bpf && mc_head, call bpf_mtap code */ 763 if (mc_head) { 764 ETHER_BPF_MTAP(ifp, mc_head); 765 } 766 } else { 767 retries++; 768 if (retries < 4) { 769 goto retry_send; 770 } 771 772 IF_PREPEND(&ifp->if_snd, m_head); 773 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 774 775 /* 776 * Null the mbuf pointer so the completion function 777 * does not free the mbuf chain. We just pushed the 778 * mbuf chain back on the if_snd queue. 779 */ 780 packet->compl.send.send_completion_tid = 0; 781 782 /* 783 * Release the resources since we will not get any 784 * send completion 785 */ 786 netvsc_xmit_completion(packet); 787 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 788 } 789 790 /* if bpf && mc_head, free the mbuf chain copy */ 791 if (mc_head) { 792 m_freem(mc_head); 793 } 794 } 795 796 return (ret); 797} 798 799/* 800 * Link up/down notification 801 */ 802void 803netvsc_linkstatus_callback(struct hv_device *device_obj, uint32_t status) 804{ 805 hn_softc_t *sc = device_get_softc(device_obj->device); 806 807 if (sc == NULL) { 808 return; 809 } 810 811 if (status == 1) { 812 sc->hn_carrier = 1; 813 } else { 814 sc->hn_carrier = 0; 815 } 816} 817 818/* 819 * Append the specified data to the indicated mbuf chain, 820 * Extend the mbuf chain if the new data does not fit in 821 * existing space. 822 * 823 * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c. 824 * There should be an equivalent in the kernel mbuf code, 825 * but there does not appear to be one yet. 826 * 827 * Differs from m_append() in that additional mbufs are 828 * allocated with cluster size MJUMPAGESIZE, and filled 829 * accordingly. 830 * 831 * Return 1 if able to complete the job; otherwise 0. 832 */ 833static int 834hv_m_append(struct mbuf *m0, int len, c_caddr_t cp) 835{ 836 struct mbuf *m, *n; 837 int remainder, space; 838 839 for (m = m0; m->m_next != NULL; m = m->m_next) 840 ; 841 remainder = len; 842 space = M_TRAILINGSPACE(m); 843 if (space > 0) { 844 /* 845 * Copy into available space. 846 */ 847 if (space > remainder) 848 space = remainder; 849 bcopy(cp, mtod(m, caddr_t) + m->m_len, space); 850 m->m_len += space; 851 cp += space; 852 remainder -= space; 853 } 854 while (remainder > 0) { 855 /* 856 * Allocate a new mbuf; could check space 857 * and allocate a cluster instead. 858 */ 859 n = m_getjcl(M_DONTWAIT, m->m_type, 0, MJUMPAGESIZE); 860 if (n == NULL) 861 break; 862 n->m_len = min(MJUMPAGESIZE, remainder); 863 bcopy(cp, mtod(n, caddr_t), n->m_len); 864 cp += n->m_len; 865 remainder -= n->m_len; 866 m->m_next = n; 867 m = n; 868 } 869 if (m0->m_flags & M_PKTHDR) 870 m0->m_pkthdr.len += len - remainder; 871 872 return (remainder == 0); 873} 874 875 876/* 877 * Called when we receive a data packet from the "wire" on the 878 * specified device 879 * 880 * Note: This is no longer used as a callback 881 */ 882int 883netvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet, 884 rndis_tcp_ip_csum_info *csum_info) 885{ 886 hn_softc_t *sc = (hn_softc_t *)device_get_softc(device_ctx->device); 887 struct mbuf *m_new; 888 struct ifnet *ifp; 889 device_t dev = device_ctx->device; 890 int size; 891 892 if (sc == NULL) { 893 return (0); /* TODO: KYS how can this be! */ 894 } 895 896 ifp = sc->hn_ifp; 897 898 ifp = sc->arpcom.ac_ifp; 899 900 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 901 return (0); 902 } 903 904 /* 905 * Bail out if packet contains more data than configured MTU. 906 */ 907 if (packet->tot_data_buf_len > (ifp->if_mtu + ETHER_HDR_LEN)) { 908 return (0); 909 } 910 911 /* 912 * Get an mbuf with a cluster. For packets 2K or less, 913 * get a standard 2K cluster. For anything larger, get a 914 * 4K cluster. Any buffers larger than 4K can cause problems 915 * if looped around to the Hyper-V TX channel, so avoid them. 916 */ 917 size = MCLBYTES; 918 919 if (packet->tot_data_buf_len > MCLBYTES) { 920 /* 4096 */ 921 size = MJUMPAGESIZE; 922 } 923 924 m_new = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, size); 925 926 if (m_new == NULL) { 927 device_printf(dev, "alloc mbuf failed.\n"); 928 return (0); 929 } 930 931 hv_m_append(m_new, packet->tot_data_buf_len, 932 packet->data); 933 934 m_new->m_pkthdr.rcvif = ifp; 935 936 /* receive side checksum offload */ 937 m_new->m_pkthdr.csum_flags = 0; 938 if (NULL != csum_info) { 939 /* IP csum offload */ 940 if (csum_info->receive.ip_csum_succeeded) { 941 m_new->m_pkthdr.csum_flags |= 942 (CSUM_IP_CHECKED | CSUM_IP_VALID); 943 } 944 945 /* TCP csum offload */ 946 if (csum_info->receive.tcp_csum_succeeded) { 947 m_new->m_pkthdr.csum_flags |= 948 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 949 m_new->m_pkthdr.csum_data = 0xffff; 950 } 951 } 952 953 if ((packet->vlan_tci != 0) && 954 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) { 955 m_new->m_pkthdr.ether_vtag = packet->vlan_tci; 956 m_new->m_flags |= M_VLANTAG; 957 } 958 959 /* 960 * Note: Moved RX completion back to hv_nv_on_receive() so all 961 * messages (not just data messages) will trigger a response. 962 */ 963 964 ifp->if_ipackets++; 965 966 /* We're not holding the lock here, so don't release it */ 967 (*ifp->if_input)(ifp, m_new); 968 969 return (0); 970} 971 972/* 973 * Rules for using sc->temp_unusable: 974 * 1. sc->temp_unusable can only be read or written while holding NV_LOCK() 975 * 2. code reading sc->temp_unusable under NV_LOCK(), and finding 976 * sc->temp_unusable set, must release NV_LOCK() and exit 977 * 3. to retain exclusive control of the interface, 978 * sc->temp_unusable must be set by code before releasing NV_LOCK() 979 * 4. only code setting sc->temp_unusable can clear sc->temp_unusable 980 * 5. code setting sc->temp_unusable must eventually clear sc->temp_unusable 981 */ 982 983/* 984 * Standard ioctl entry point. Called when the user wants to configure 985 * the interface. 986 */ 987static int 988hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 989{ 990 hn_softc_t *sc = ifp->if_softc; 991 struct ifreq *ifr = (struct ifreq *)data; 992#ifdef INET 993 struct ifaddr *ifa = (struct ifaddr *)data; 994#endif 995 netvsc_device_info device_info; 996 struct hv_device *hn_dev; 997 int mask, error = 0; 998 int retry_cnt = 500; 999 1000 switch(cmd) { 1001 1002 case SIOCSIFADDR: 1003#ifdef INET 1004 if (ifa->ifa_addr->sa_family == AF_INET) { 1005 ifp->if_flags |= IFF_UP; 1006 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) 1007 hn_ifinit(sc); 1008 arp_ifinit(ifp, ifa); 1009 } else 1010#endif 1011 error = ether_ioctl(ifp, cmd, data); 1012 break; 1013 case SIOCSIFMTU: 1014 hn_dev = vmbus_get_devctx(sc->hn_dev); 1015 1016 /* Check MTU value change */ 1017 if (ifp->if_mtu == ifr->ifr_mtu) 1018 break; 1019 1020 if (ifr->ifr_mtu > NETVSC_MAX_CONFIGURABLE_MTU) { 1021 error = EINVAL; 1022 break; 1023 } 1024 1025 /* Obtain and record requested MTU */ 1026 ifp->if_mtu = ifr->ifr_mtu; 1027 1028 do { 1029 NV_LOCK(sc); 1030 if (!sc->temp_unusable) { 1031 sc->temp_unusable = TRUE; 1032 retry_cnt = -1; 1033 } 1034 NV_UNLOCK(sc); 1035 if (retry_cnt > 0) { 1036 retry_cnt--; 1037 DELAY(5 * 1000); 1038 } 1039 } while (retry_cnt > 0); 1040 1041 if (retry_cnt == 0) { 1042 error = EINVAL; 1043 break; 1044 } 1045 1046 /* We must remove and add back the device to cause the new 1047 * MTU to take effect. This includes tearing down, but not 1048 * deleting the channel, then bringing it back up. 1049 */ 1050 error = hv_rf_on_device_remove(hn_dev, HV_RF_NV_RETAIN_CHANNEL); 1051 if (error) { 1052 NV_LOCK(sc); 1053 sc->temp_unusable = FALSE; 1054 NV_UNLOCK(sc); 1055 break; 1056 } 1057 error = hv_rf_on_device_add(hn_dev, &device_info); 1058 if (error) { 1059 NV_LOCK(sc); 1060 sc->temp_unusable = FALSE; 1061 NV_UNLOCK(sc); 1062 break; 1063 } 1064 1065 hn_ifinit_locked(sc); 1066 1067 NV_LOCK(sc); 1068 sc->temp_unusable = FALSE; 1069 NV_UNLOCK(sc); 1070 break; 1071 case SIOCSIFFLAGS: 1072 do { 1073 NV_LOCK(sc); 1074 if (!sc->temp_unusable) { 1075 sc->temp_unusable = TRUE; 1076 retry_cnt = -1; 1077 } 1078 NV_UNLOCK(sc); 1079 if (retry_cnt > 0) { 1080 retry_cnt--; 1081 DELAY(5 * 1000); 1082 } 1083 } while (retry_cnt > 0); 1084 1085 if (retry_cnt == 0) { 1086 error = EINVAL; 1087 break; 1088 } 1089 1090 if (ifp->if_flags & IFF_UP) { 1091 /* 1092 * If only the state of the PROMISC flag changed, 1093 * then just use the 'set promisc mode' command 1094 * instead of reinitializing the entire NIC. Doing 1095 * a full re-init means reloading the firmware and 1096 * waiting for it to start up, which may take a 1097 * second or two. 1098 */ 1099#ifdef notyet 1100 /* Fixme: Promiscuous mode? */ 1101 if (ifp->if_drv_flags & IFF_DRV_RUNNING && 1102 ifp->if_flags & IFF_PROMISC && 1103 !(sc->hn_if_flags & IFF_PROMISC)) { 1104 /* do something here for Hyper-V */ 1105 } else if (ifp->if_drv_flags & IFF_DRV_RUNNING && 1106 !(ifp->if_flags & IFF_PROMISC) && 1107 sc->hn_if_flags & IFF_PROMISC) { 1108 /* do something here for Hyper-V */ 1109 } else 1110#endif 1111 hn_ifinit_locked(sc); 1112 } else { 1113 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1114 hn_stop(sc); 1115 } 1116 } 1117 NV_LOCK(sc); 1118 sc->temp_unusable = FALSE; 1119 NV_UNLOCK(sc); 1120 sc->hn_if_flags = ifp->if_flags; 1121 error = 0; 1122 break; 1123 case SIOCSIFCAP: 1124 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 1125 if (mask & IFCAP_TXCSUM) { 1126 if (IFCAP_TXCSUM & ifp->if_capenable) { 1127 ifp->if_capenable &= ~IFCAP_TXCSUM; 1128 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP); 1129 } else { 1130 ifp->if_capenable |= IFCAP_TXCSUM; 1131 /* 1132 * Only enable UDP checksum offloading on 1133 * Windows Server 2012R2 or later releases. 1134 */ 1135 if (hv_vmbus_protocal_version >= 1136 HV_VMBUS_VERSION_WIN8_1) { 1137 ifp->if_hwassist |= 1138 (CSUM_TCP | CSUM_UDP); 1139 } else { 1140 ifp->if_hwassist |= CSUM_TCP; 1141 } 1142 } 1143 } 1144 1145 if (mask & IFCAP_RXCSUM) { 1146 if (IFCAP_RXCSUM & ifp->if_capenable) { 1147 ifp->if_capenable &= ~IFCAP_RXCSUM; 1148 } else { 1149 ifp->if_capenable |= IFCAP_RXCSUM; 1150 } 1151 } 1152 1153 if (mask & IFCAP_TSO4) { 1154 ifp->if_capenable ^= IFCAP_TSO4; 1155 ifp->if_hwassist ^= CSUM_IP_TSO; 1156 } 1157 1158 if (mask & IFCAP_TSO6) { 1159 ifp->if_capenable ^= IFCAP_TSO6; 1160 ifp->if_hwassist ^= CSUM_IP6_TSO; 1161 } 1162 1163 error = 0; 1164 break; 1165 case SIOCADDMULTI: 1166 case SIOCDELMULTI: 1167#ifdef notyet 1168 /* Fixme: Multicast mode? */ 1169 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1170 NV_LOCK(sc); 1171 netvsc_setmulti(sc); 1172 NV_UNLOCK(sc); 1173 error = 0; 1174 } 1175#endif 1176 /* FALLTHROUGH */ 1177 case SIOCSIFMEDIA: 1178 case SIOCGIFMEDIA: 1179 error = EINVAL; 1180 break; 1181 default: 1182 error = ether_ioctl(ifp, cmd, data); 1183 break; 1184 } 1185 1186 return (error); 1187} 1188 1189/* 1190 * 1191 */ 1192static void 1193hn_stop(hn_softc_t *sc) 1194{ 1195 struct ifnet *ifp; 1196 int ret; 1197 struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev); 1198 1199 ifp = sc->hn_ifp; 1200 1201 if (bootverbose) 1202 printf(" Closing Device ...\n"); 1203 1204 ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); 1205 if_link_state_change(ifp, LINK_STATE_DOWN); 1206 sc->hn_initdone = 0; 1207 1208 ret = hv_rf_on_close(device_ctx); 1209} 1210 1211/* 1212 * FreeBSD transmit entry point 1213 */ 1214static void 1215hn_start(struct ifnet *ifp) 1216{ 1217 hn_softc_t *sc; 1218 1219 sc = ifp->if_softc; 1220 NV_LOCK(sc); 1221 if (sc->temp_unusable) { 1222 NV_UNLOCK(sc); 1223 return; 1224 } 1225 hn_start_locked(ifp); 1226 NV_UNLOCK(sc); 1227} 1228 1229/* 1230 * 1231 */ 1232static void 1233hn_ifinit_locked(hn_softc_t *sc) 1234{ 1235 struct ifnet *ifp; 1236 struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev); 1237 int ret; 1238 1239 ifp = sc->hn_ifp; 1240 1241 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1242 return; 1243 } 1244 1245 hv_promisc_mode = 1; 1246 1247 ret = hv_rf_on_open(device_ctx); 1248 if (ret != 0) { 1249 return; 1250 } else { 1251 sc->hn_initdone = 1; 1252 } 1253 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1254 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 1255 if_link_state_change(ifp, LINK_STATE_UP); 1256} 1257 1258/* 1259 * 1260 */ 1261static void 1262hn_ifinit(void *xsc) 1263{ 1264 hn_softc_t *sc = xsc; 1265 1266 NV_LOCK(sc); 1267 if (sc->temp_unusable) { 1268 NV_UNLOCK(sc); 1269 return; 1270 } 1271 sc->temp_unusable = TRUE; 1272 NV_UNLOCK(sc); 1273 1274 hn_ifinit_locked(sc); 1275 1276 NV_LOCK(sc); 1277 sc->temp_unusable = FALSE; 1278 NV_UNLOCK(sc); 1279} 1280 1281#ifdef LATER 1282/* 1283 * 1284 */ 1285static void 1286hn_watchdog(struct ifnet *ifp) 1287{ 1288 hn_softc_t *sc; 1289 sc = ifp->if_softc; 1290 1291 printf("hn%d: watchdog timeout -- resetting\n", sc->hn_unit); 1292 hn_ifinit(sc); /*???*/ 1293 ifp->if_oerrors++; 1294} 1295#endif 1296 1297static device_method_t netvsc_methods[] = { 1298 /* Device interface */ 1299 DEVMETHOD(device_probe, netvsc_probe), 1300 DEVMETHOD(device_attach, netvsc_attach), 1301 DEVMETHOD(device_detach, netvsc_detach), 1302 DEVMETHOD(device_shutdown, netvsc_shutdown), 1303 1304 { 0, 0 } 1305}; 1306 1307static driver_t netvsc_driver = { 1308 NETVSC_DEVNAME, 1309 netvsc_methods, 1310 sizeof(hn_softc_t) 1311}; 1312 1313static devclass_t netvsc_devclass; 1314 1315DRIVER_MODULE(hn, vmbus, netvsc_driver, netvsc_devclass, 0, 0); 1316MODULE_VERSION(hn, 1); 1317MODULE_DEPEND(hn, vmbus, 1, 1, 1); 1318SYSINIT(netvsc_initx, SI_SUB_KTHREAD_IDLE, SI_ORDER_MIDDLE + 1, netvsc_init, 1319 NULL); 1320 1321