1/*- 2 * Copyright (c) 2010-2012 Citrix Inc. 3 * Copyright (c) 2009-2012,2016-2017 Microsoft Corp. 4 * Copyright (c) 2012 NetApp Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29/*- 30 * Copyright (c) 2004-2006 Kip Macy 31 * All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 52 * SUCH DAMAGE. 53 */ 54 55#include <sys/cdefs.h> 56__FBSDID("$FreeBSD$"); 57 58#include "opt_hn.h" 59#include "opt_inet6.h" 60#include "opt_inet.h" 61#include "opt_rss.h" 62 63#include <sys/param.h> 64#include <sys/systm.h> 65#include <sys/bus.h> 66#include <sys/counter.h> 67#include <sys/kernel.h> 68#include <sys/limits.h> 69#include <sys/malloc.h> 70#include <sys/mbuf.h> 71#include <sys/module.h> 72#include <sys/queue.h> 73#include <sys/lock.h> 74#include <sys/proc.h> 75#include <sys/rmlock.h> 76#include <sys/sbuf.h> 77#include <sys/sched.h> 78#include <sys/smp.h> 79#include <sys/socket.h> 80#include <sys/sockio.h> 81#include <sys/sx.h> 82#include <sys/sysctl.h> 83#include <sys/taskqueue.h> 84#include <sys/buf_ring.h> 85#include <sys/eventhandler.h> 86 87#include <machine/atomic.h> 88#include <machine/in_cksum.h> 89 90#include <net/bpf.h> 91#include <net/ethernet.h> 92#include <net/if.h> 93#include <net/if_dl.h> 94#include <net/if_media.h> 95#include <net/if_types.h> 96#include <net/if_var.h> 97#include <net/rndis.h> 98#ifdef RSS 99#include <net/rss_config.h> 100#endif 101 102#include <netinet/in_systm.h> 103#include <netinet/in.h> 104#include <netinet/ip.h> 105#include <netinet/ip6.h> 106#include <netinet/tcp.h> 107#include <netinet/tcp_lro.h> 108#include <netinet/udp.h> 109 110#include <dev/hyperv/include/hyperv.h> 111#include <dev/hyperv/include/hyperv_busdma.h> 112#include <dev/hyperv/include/vmbus.h> 113#include <dev/hyperv/include/vmbus_xact.h> 114 115#include <dev/hyperv/netvsc/ndis.h> 116#include <dev/hyperv/netvsc/if_hnreg.h> 117#include <dev/hyperv/netvsc/if_hnvar.h> 118#include <dev/hyperv/netvsc/hn_nvs.h> 119#include <dev/hyperv/netvsc/hn_rndis.h> 120 121#include "vmbus_if.h" 122 123#define HN_IFSTART_SUPPORT 124 125#define HN_RING_CNT_DEF_MAX 8 126 127#define HN_VFMAP_SIZE_DEF 8 128 129#define HN_XPNT_VF_ATTWAIT_MIN 2 /* seconds */ 130 131/* YYY should get it from the underlying channel */ 132#define HN_TX_DESC_CNT 512 133 134#define HN_RNDIS_PKT_LEN \ 135 (sizeof(struct rndis_packet_msg) + \ 136 HN_RNDIS_PKTINFO_SIZE(HN_NDIS_HASH_VALUE_SIZE) + \ 137 HN_RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \ 138 HN_RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \ 139 HN_RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE)) 140#define HN_RNDIS_PKT_BOUNDARY PAGE_SIZE 141#define HN_RNDIS_PKT_ALIGN CACHE_LINE_SIZE 142 143#define HN_TX_DATA_BOUNDARY PAGE_SIZE 144#define HN_TX_DATA_MAXSIZE IP_MAXPACKET 145#define HN_TX_DATA_SEGSIZE PAGE_SIZE 146/* -1 for RNDIS packet message */ 147#define HN_TX_DATA_SEGCNT_MAX (HN_GPACNT_MAX - 1) 148 149#define HN_DIRECT_TX_SIZE_DEF 128 150 151#define HN_EARLY_TXEOF_THRESH 8 152 153#define HN_PKTBUF_LEN_DEF (16 * 1024) 154 155#define HN_LROENT_CNT_DEF 128 156 157#define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU) 158#define HN_LRO_LENLIM_DEF (25 * ETHERMTU) 159/* YYY 2*MTU is a bit rough, but should be good enough. */ 160#define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu) 161 162#define HN_LRO_ACKCNT_DEF 1 163 164#define HN_LOCK_INIT(sc) \ 165 sx_init(&(sc)->hn_lock, device_get_nameunit((sc)->hn_dev)) 166#define HN_LOCK_DESTROY(sc) sx_destroy(&(sc)->hn_lock) 167#define HN_LOCK_ASSERT(sc) sx_assert(&(sc)->hn_lock, SA_XLOCKED) 168#define HN_LOCK(sc) \ 169do { \ 170 while (sx_try_xlock(&(sc)->hn_lock) == 0) { \ 171 /* Relinquish cpu to avoid deadlock */ \ 172 sched_relinquish(curthread); \ 173 DELAY(1000); \ 174 } \ 175} while (0) 176#define HN_UNLOCK(sc) sx_xunlock(&(sc)->hn_lock) 177 178#define HN_CSUM_IP_MASK (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP) 179#define HN_CSUM_IP6_MASK (CSUM_IP6_TCP | CSUM_IP6_UDP) 180#define HN_CSUM_IP_HWASSIST(sc) \ 181 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP_MASK) 182#define HN_CSUM_IP6_HWASSIST(sc) \ 183 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK) 184 185#define HN_PKTSIZE_MIN(align) \ 186 roundup2(ETHER_MIN_LEN + ETHER_VLAN_ENCAP_LEN - ETHER_CRC_LEN + \ 187 HN_RNDIS_PKT_LEN, (align)) 188#define HN_PKTSIZE(m, align) \ 189 roundup2((m)->m_pkthdr.len + HN_RNDIS_PKT_LEN, (align)) 190 191#ifdef RSS 192#define HN_RING_IDX2CPU(sc, idx) rss_getcpu((idx) % rss_getnumbuckets()) 193#else 194#define HN_RING_IDX2CPU(sc, idx) (((sc)->hn_cpu + (idx)) % mp_ncpus) 195#endif 196 197struct hn_txdesc { 198#ifndef HN_USE_TXDESC_BUFRING 199 SLIST_ENTRY(hn_txdesc) link; 200#endif 201 STAILQ_ENTRY(hn_txdesc) agg_link; 202 203 /* Aggregated txdescs, in sending order. */ 204 STAILQ_HEAD(, hn_txdesc) agg_list; 205 206 /* The oldest packet, if transmission aggregation happens. */ 207 struct mbuf *m; 208 struct hn_tx_ring *txr; 209 int refs; 210 uint32_t flags; /* HN_TXD_FLAG_ */ 211 struct hn_nvs_sendctx send_ctx; 212 uint32_t chim_index; 213 int chim_size; 214 215 bus_dmamap_t data_dmap; 216 217 bus_addr_t rndis_pkt_paddr; 218 struct rndis_packet_msg *rndis_pkt; 219 bus_dmamap_t rndis_pkt_dmap; 220}; 221 222#define HN_TXD_FLAG_ONLIST 0x0001 223#define HN_TXD_FLAG_DMAMAP 0x0002 224#define HN_TXD_FLAG_ONAGG 0x0004 225 226#define HN_NDIS_PKTINFO_SUBALLOC 0x01 227#define HN_NDIS_PKTINFO_1ST_FRAG 0x02 228#define HN_NDIS_PKTINFO_LAST_FRAG 0x04 229 230struct packet_info_id { 231 uint8_t ver; 232 uint8_t flag; 233 uint16_t pkt_id; 234}; 235 236#define NDIS_PKTINFOID_SZ sizeof(struct packet_info_id) 237 238 239struct hn_rxinfo { 240 const uint32_t *vlan_info; 241 const uint32_t *csum_info; 242 const uint32_t *hash_info; 243 const uint32_t *hash_value; 244 const struct packet_info_id *pktinfo_id; 245}; 246 247struct hn_rxvf_setarg { 248 struct hn_rx_ring *rxr; 249 struct ifnet *vf_ifp; 250}; 251 252#define HN_RXINFO_VLAN 0x0001 253#define HN_RXINFO_CSUM 0x0002 254#define HN_RXINFO_HASHINF 0x0004 255#define HN_RXINFO_HASHVAL 0x0008 256#define HN_RXINFO_PKTINFO_ID 0x0010 257#define HN_RXINFO_ALL \ 258 (HN_RXINFO_VLAN | \ 259 HN_RXINFO_CSUM | \ 260 HN_RXINFO_HASHINF | \ 261 HN_RXINFO_HASHVAL | \ 262 HN_RXINFO_PKTINFO_ID) 263 264static int hn_probe(device_t); 265static int hn_attach(device_t); 266static int hn_detach(device_t); 267static int hn_shutdown(device_t); 268static void hn_chan_callback(struct vmbus_channel *, 269 void *); 270 271static void hn_init(void *); 272static int hn_ioctl(struct ifnet *, u_long, caddr_t); 273#ifdef HN_IFSTART_SUPPORT 274static void hn_start(struct ifnet *); 275#endif 276static int hn_transmit(struct ifnet *, struct mbuf *); 277static void hn_xmit_qflush(struct ifnet *); 278static int hn_ifmedia_upd(struct ifnet *); 279static void hn_ifmedia_sts(struct ifnet *, 280 struct ifmediareq *); 281 282static void hn_ifnet_event(void *, struct ifnet *, int); 283static void hn_ifaddr_event(void *, struct ifnet *); 284static void hn_ifnet_attevent(void *, struct ifnet *); 285static void hn_ifnet_detevent(void *, struct ifnet *); 286static void hn_ifnet_lnkevent(void *, struct ifnet *, int); 287 288static bool hn_ismyvf(const struct hn_softc *, 289 const struct ifnet *); 290static void hn_rxvf_change(struct hn_softc *, 291 struct ifnet *, bool); 292static void hn_rxvf_set(struct hn_softc *, struct ifnet *); 293static void hn_rxvf_set_task(void *, int); 294static void hn_xpnt_vf_input(struct ifnet *, struct mbuf *); 295static int hn_xpnt_vf_iocsetflags(struct hn_softc *); 296static int hn_xpnt_vf_iocsetcaps(struct hn_softc *, 297 struct ifreq *); 298static void hn_xpnt_vf_saveifflags(struct hn_softc *); 299static bool hn_xpnt_vf_isready(struct hn_softc *); 300static void hn_xpnt_vf_setready(struct hn_softc *); 301static void hn_xpnt_vf_init_taskfunc(void *, int); 302static void hn_xpnt_vf_init(struct hn_softc *); 303static void hn_xpnt_vf_setenable(struct hn_softc *); 304static void hn_xpnt_vf_setdisable(struct hn_softc *, bool); 305static void hn_vf_rss_fixup(struct hn_softc *, bool); 306static void hn_vf_rss_restore(struct hn_softc *); 307 308static int hn_rndis_rxinfo(const void *, int, 309 struct hn_rxinfo *); 310static void hn_rndis_rx_data(struct hn_rx_ring *, 311 const void *, int); 312static void hn_rndis_rx_status(struct hn_softc *, 313 const void *, int); 314static void hn_rndis_init_fixat(struct hn_softc *, int); 315 316static void hn_nvs_handle_notify(struct hn_softc *, 317 const struct vmbus_chanpkt_hdr *); 318static void hn_nvs_handle_comp(struct hn_softc *, 319 struct vmbus_channel *, 320 const struct vmbus_chanpkt_hdr *); 321static void hn_nvs_handle_rxbuf(struct hn_rx_ring *, 322 struct vmbus_channel *, 323 const struct vmbus_chanpkt_hdr *); 324static void hn_nvs_ack_rxbuf(struct hn_rx_ring *, 325 struct vmbus_channel *, uint64_t); 326 327#if __FreeBSD_version >= 1100099 328static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS); 329static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS); 330#endif 331static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS); 332static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS); 333#if __FreeBSD_version < 1100095 334static int hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS); 335#else 336static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS); 337#endif 338static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); 339static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); 340static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS); 341static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS); 342static int hn_caps_sysctl(SYSCTL_HANDLER_ARGS); 343static int hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS); 344static int hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS); 345#ifndef RSS 346static int hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS); 347static int hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS); 348#endif 349static int hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS); 350static int hn_rss_hcap_sysctl(SYSCTL_HANDLER_ARGS); 351static int hn_rss_mbuf_sysctl(SYSCTL_HANDLER_ARGS); 352static int hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS); 353static int hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS); 354static int hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS); 355static int hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS); 356static int hn_polling_sysctl(SYSCTL_HANDLER_ARGS); 357static int hn_vf_sysctl(SYSCTL_HANDLER_ARGS); 358static int hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS); 359static int hn_vflist_sysctl(SYSCTL_HANDLER_ARGS); 360static int hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS); 361static int hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS); 362static int hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS); 363 364static void hn_stop(struct hn_softc *, bool); 365static void hn_init_locked(struct hn_softc *); 366static int hn_chan_attach(struct hn_softc *, 367 struct vmbus_channel *); 368static void hn_chan_detach(struct hn_softc *, 369 struct vmbus_channel *); 370static int hn_attach_subchans(struct hn_softc *); 371static void hn_detach_allchans(struct hn_softc *); 372static void hn_chan_rollup(struct hn_rx_ring *, 373 struct hn_tx_ring *); 374static void hn_set_ring_inuse(struct hn_softc *, int); 375static int hn_synth_attach(struct hn_softc *, int); 376static void hn_synth_detach(struct hn_softc *); 377static int hn_synth_alloc_subchans(struct hn_softc *, 378 int *); 379static bool hn_synth_attachable(const struct hn_softc *); 380static void hn_suspend(struct hn_softc *); 381static void hn_suspend_data(struct hn_softc *); 382static void hn_suspend_mgmt(struct hn_softc *); 383static void hn_resume(struct hn_softc *); 384static void hn_resume_data(struct hn_softc *); 385static void hn_resume_mgmt(struct hn_softc *); 386static void hn_suspend_mgmt_taskfunc(void *, int); 387static void hn_chan_drain(struct hn_softc *, 388 struct vmbus_channel *); 389static void hn_disable_rx(struct hn_softc *); 390static void hn_drain_rxtx(struct hn_softc *, int); 391static void hn_polling(struct hn_softc *, u_int); 392static void hn_chan_polling(struct vmbus_channel *, u_int); 393static void hn_mtu_change_fixup(struct hn_softc *); 394 395static void hn_update_link_status(struct hn_softc *); 396static void hn_change_network(struct hn_softc *); 397static void hn_link_taskfunc(void *, int); 398static void hn_netchg_init_taskfunc(void *, int); 399static void hn_netchg_status_taskfunc(void *, int); 400static void hn_link_status(struct hn_softc *); 401 402static int hn_create_rx_data(struct hn_softc *, int); 403static void hn_destroy_rx_data(struct hn_softc *); 404static int hn_check_iplen(const struct mbuf *, int); 405static void hn_rxpkt_proto(const struct mbuf *, int *, int *); 406static int hn_set_rxfilter(struct hn_softc *, uint32_t); 407static int hn_rxfilter_config(struct hn_softc *); 408static int hn_rss_reconfig(struct hn_softc *); 409static void hn_rss_ind_fixup(struct hn_softc *); 410static void hn_rss_mbuf_hash(struct hn_softc *, uint32_t); 411static int hn_rxpkt(struct hn_rx_ring *); 412static uint32_t hn_rss_type_fromndis(uint32_t); 413static uint32_t hn_rss_type_tondis(uint32_t); 414 415static int hn_tx_ring_create(struct hn_softc *, int); 416static void hn_tx_ring_destroy(struct hn_tx_ring *); 417static int hn_create_tx_data(struct hn_softc *, int); 418static void hn_fixup_tx_data(struct hn_softc *); 419static void hn_fixup_rx_data(struct hn_softc *); 420static void hn_destroy_tx_data(struct hn_softc *); 421static void hn_txdesc_dmamap_destroy(struct hn_txdesc *); 422static void hn_txdesc_gc(struct hn_tx_ring *, 423 struct hn_txdesc *); 424static int hn_encap(struct ifnet *, struct hn_tx_ring *, 425 struct hn_txdesc *, struct mbuf **); 426static int hn_txpkt(struct ifnet *, struct hn_tx_ring *, 427 struct hn_txdesc *); 428static void hn_set_chim_size(struct hn_softc *, int); 429static void hn_set_tso_maxsize(struct hn_softc *, int, int); 430static bool hn_tx_ring_pending(struct hn_tx_ring *); 431static void hn_tx_ring_qflush(struct hn_tx_ring *); 432static void hn_resume_tx(struct hn_softc *, int); 433static void hn_set_txagg(struct hn_softc *); 434static void *hn_try_txagg(struct ifnet *, 435 struct hn_tx_ring *, struct hn_txdesc *, 436 int); 437static int hn_get_txswq_depth(const struct hn_tx_ring *); 438static void hn_txpkt_done(struct hn_nvs_sendctx *, 439 struct hn_softc *, struct vmbus_channel *, 440 const void *, int); 441static int hn_txpkt_sglist(struct hn_tx_ring *, 442 struct hn_txdesc *); 443static int hn_txpkt_chim(struct hn_tx_ring *, 444 struct hn_txdesc *); 445static int hn_xmit(struct hn_tx_ring *, int); 446static void hn_xmit_taskfunc(void *, int); 447static void hn_xmit_txeof(struct hn_tx_ring *); 448static void hn_xmit_txeof_taskfunc(void *, int); 449#ifdef HN_IFSTART_SUPPORT 450static int hn_start_locked(struct hn_tx_ring *, int); 451static void hn_start_taskfunc(void *, int); 452static void hn_start_txeof(struct hn_tx_ring *); 453static void hn_start_txeof_taskfunc(void *, int); 454#endif 455 456SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 457 "Hyper-V network interface"); 458 459/* Trust tcp segements verification on host side. */ 460static int hn_trust_hosttcp = 1; 461SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN, 462 &hn_trust_hosttcp, 0, 463 "Trust tcp segement verification on host side, " 464 "when csum info is missing (global setting)"); 465 466/* Trust udp datagrams verification on host side. */ 467static int hn_trust_hostudp = 1; 468SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN, 469 &hn_trust_hostudp, 0, 470 "Trust udp datagram verification on host side, " 471 "when csum info is missing (global setting)"); 472 473/* Trust ip packets verification on host side. */ 474static int hn_trust_hostip = 1; 475SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN, 476 &hn_trust_hostip, 0, 477 "Trust ip packet verification on host side, " 478 "when csum info is missing (global setting)"); 479 480/* 481 * Offload UDP/IPv4 checksum. 482 */ 483static int hn_enable_udp4cs = 1; 484SYSCTL_INT(_hw_hn, OID_AUTO, enable_udp4cs, CTLFLAG_RDTUN, 485 &hn_enable_udp4cs, 0, "Offload UDP/IPv4 checksum"); 486 487/* 488 * Offload UDP/IPv6 checksum. 489 */ 490static int hn_enable_udp6cs = 1; 491SYSCTL_INT(_hw_hn, OID_AUTO, enable_udp6cs, CTLFLAG_RDTUN, 492 &hn_enable_udp6cs, 0, "Offload UDP/IPv6 checksum"); 493 494/* Stats. */ 495static counter_u64_t hn_udpcs_fixup; 496SYSCTL_COUNTER_U64(_hw_hn, OID_AUTO, udpcs_fixup, CTLFLAG_RW, 497 &hn_udpcs_fixup, "# of UDP checksum fixup"); 498 499/* 500 * See hn_set_hlen(). 501 * 502 * This value is for Azure. For Hyper-V, set this above 503 * 65536 to disable UDP datagram checksum fixup. 504 */ 505static int hn_udpcs_fixup_mtu = 1420; 506SYSCTL_INT(_hw_hn, OID_AUTO, udpcs_fixup_mtu, CTLFLAG_RWTUN, 507 &hn_udpcs_fixup_mtu, 0, "UDP checksum fixup MTU threshold"); 508 509/* Limit TSO burst size */ 510static int hn_tso_maxlen = IP_MAXPACKET; 511SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN, 512 &hn_tso_maxlen, 0, "TSO burst limit"); 513 514/* Limit chimney send size */ 515static int hn_tx_chimney_size = 0; 516SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN, 517 &hn_tx_chimney_size, 0, "Chimney send packet size limit"); 518 519/* Limit the size of packet for direct transmission */ 520static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF; 521SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN, 522 &hn_direct_tx_size, 0, "Size of the packet for direct transmission"); 523 524/* # of LRO entries per RX ring */ 525#if defined(INET) || defined(INET6) 526#if __FreeBSD_version >= 1100095 527static int hn_lro_entry_count = HN_LROENT_CNT_DEF; 528SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN, 529 &hn_lro_entry_count, 0, "LRO entry count"); 530#endif 531#endif 532 533static int hn_tx_taskq_cnt = 1; 534SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_cnt, CTLFLAG_RDTUN, 535 &hn_tx_taskq_cnt, 0, "# of TX taskqueues"); 536 537#define HN_TX_TASKQ_M_INDEP 0 538#define HN_TX_TASKQ_M_GLOBAL 1 539#define HN_TX_TASKQ_M_EVTTQ 2 540 541static int hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP; 542SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_mode, CTLFLAG_RDTUN, 543 &hn_tx_taskq_mode, 0, "TX taskqueue modes: " 544 "0 - independent, 1 - share global tx taskqs, 2 - share event taskqs"); 545 546#ifndef HN_USE_TXDESC_BUFRING 547static int hn_use_txdesc_bufring = 0; 548#else 549static int hn_use_txdesc_bufring = 1; 550#endif 551SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD, 552 &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors"); 553 554#ifdef HN_IFSTART_SUPPORT 555/* Use ifnet.if_start instead of ifnet.if_transmit */ 556static int hn_use_if_start = 0; 557SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN, 558 &hn_use_if_start, 0, "Use if_start TX method"); 559#endif 560 561/* # of channels to use */ 562static int hn_chan_cnt = 0; 563SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN, 564 &hn_chan_cnt, 0, 565 "# of channels to use; each channel has one RX ring and one TX ring"); 566 567/* # of transmit rings to use */ 568static int hn_tx_ring_cnt = 0; 569SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN, 570 &hn_tx_ring_cnt, 0, "# of TX rings to use"); 571 572/* Software TX ring deptch */ 573static int hn_tx_swq_depth = 0; 574SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN, 575 &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING"); 576 577/* Enable sorted LRO, and the depth of the per-channel mbuf queue */ 578#if __FreeBSD_version >= 1100095 579static u_int hn_lro_mbufq_depth = 0; 580SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN, 581 &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue"); 582#endif 583 584/* Packet transmission aggregation size limit */ 585static int hn_tx_agg_size = -1; 586SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_size, CTLFLAG_RDTUN, 587 &hn_tx_agg_size, 0, "Packet transmission aggregation size limit"); 588 589/* Packet transmission aggregation count limit */ 590static int hn_tx_agg_pkts = -1; 591SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_pkts, CTLFLAG_RDTUN, 592 &hn_tx_agg_pkts, 0, "Packet transmission aggregation packet limit"); 593 594/* VF list */ 595SYSCTL_PROC(_hw_hn, OID_AUTO, vflist, CTLFLAG_RD | CTLTYPE_STRING, 596 0, 0, hn_vflist_sysctl, "A", "VF list"); 597 598/* VF mapping */ 599SYSCTL_PROC(_hw_hn, OID_AUTO, vfmap, CTLFLAG_RD | CTLTYPE_STRING, 600 0, 0, hn_vfmap_sysctl, "A", "VF mapping"); 601 602/* Transparent VF */ 603static int hn_xpnt_vf = 1; 604SYSCTL_INT(_hw_hn, OID_AUTO, vf_transparent, CTLFLAG_RDTUN, 605 &hn_xpnt_vf, 0, "Transparent VF mod"); 606 607/* Accurate BPF support for Transparent VF */ 608static int hn_xpnt_vf_accbpf = 0; 609SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_accbpf, CTLFLAG_RDTUN, 610 &hn_xpnt_vf_accbpf, 0, "Accurate BPF for transparent VF"); 611 612/* Extra wait for transparent VF attach routing; unit seconds. */ 613static int hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN; 614SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_attwait, CTLFLAG_RWTUN, 615 &hn_xpnt_vf_attwait, 0, 616 "Extra wait for transparent VF attach routing; unit: seconds"); 617 618static u_int hn_cpu_index; /* next CPU for channel */ 619static struct taskqueue **hn_tx_taskque;/* shared TX taskqueues */ 620 621static struct rmlock hn_vfmap_lock; 622static int hn_vfmap_size; 623static struct ifnet **hn_vfmap; 624 625#ifndef RSS 626static const uint8_t 627hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = { 628 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 629 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 630 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 631 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 632 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa 633}; 634#endif /* !RSS */ 635 636static const struct hyperv_guid hn_guid = { 637 .hv_guid = { 638 0x63, 0x51, 0x61, 0xf8, 0x3e, 0xdf, 0xc5, 0x46, 639 0x91, 0x3f, 0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e } 640}; 641 642static device_method_t hn_methods[] = { 643 /* Device interface */ 644 DEVMETHOD(device_probe, hn_probe), 645 DEVMETHOD(device_attach, hn_attach), 646 DEVMETHOD(device_detach, hn_detach), 647 DEVMETHOD(device_shutdown, hn_shutdown), 648 DEVMETHOD_END 649}; 650 651static driver_t hn_driver = { 652 "hn", 653 hn_methods, 654 sizeof(struct hn_softc) 655}; 656 657static devclass_t hn_devclass; 658 659DRIVER_MODULE(hn, vmbus, hn_driver, hn_devclass, 0, 0); 660MODULE_VERSION(hn, 1); 661MODULE_DEPEND(hn, vmbus, 1, 1, 1); 662 663#if __FreeBSD_version >= 1100099 664static void 665hn_set_lro_lenlim(struct hn_softc *sc, int lenlim) 666{ 667 int i; 668 669 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 670 sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim; 671} 672#endif 673 674static int 675hn_txpkt_sglist(struct hn_tx_ring *txr, struct hn_txdesc *txd) 676{ 677 678 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID && 679 txd->chim_size == 0, ("invalid rndis sglist txd")); 680 return (hn_nvs_send_rndis_sglist(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA, 681 &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt)); 682} 683 684static int 685hn_txpkt_chim(struct hn_tx_ring *txr, struct hn_txdesc *txd) 686{ 687 struct hn_nvs_rndis rndis; 688 689 KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID && 690 txd->chim_size > 0, ("invalid rndis chim txd")); 691 692 rndis.nvs_type = HN_NVS_TYPE_RNDIS; 693 rndis.nvs_rndis_mtype = HN_NVS_RNDIS_MTYPE_DATA; 694 rndis.nvs_chim_idx = txd->chim_index; 695 rndis.nvs_chim_sz = txd->chim_size; 696 697 return (hn_nvs_send(txr->hn_chan, VMBUS_CHANPKT_FLAG_RC, 698 &rndis, sizeof(rndis), &txd->send_ctx)); 699} 700 701static __inline uint32_t 702hn_chim_alloc(struct hn_softc *sc) 703{ 704 int i, bmap_cnt = sc->hn_chim_bmap_cnt; 705 u_long *bmap = sc->hn_chim_bmap; 706 uint32_t ret = HN_NVS_CHIM_IDX_INVALID; 707 708 for (i = 0; i < bmap_cnt; ++i) { 709 int idx; 710 711 idx = ffsl(~bmap[i]); 712 if (idx == 0) 713 continue; 714 715 --idx; /* ffsl is 1-based */ 716 KASSERT(i * LONG_BIT + idx < sc->hn_chim_cnt, 717 ("invalid i %d and idx %d", i, idx)); 718 719 if (atomic_testandset_long(&bmap[i], idx)) 720 continue; 721 722 ret = i * LONG_BIT + idx; 723 break; 724 } 725 return (ret); 726} 727 728static __inline void 729hn_chim_free(struct hn_softc *sc, uint32_t chim_idx) 730{ 731 u_long mask; 732 uint32_t idx; 733 734 idx = chim_idx / LONG_BIT; 735 KASSERT(idx < sc->hn_chim_bmap_cnt, 736 ("invalid chimney index 0x%x", chim_idx)); 737 738 mask = 1UL << (chim_idx % LONG_BIT); 739 KASSERT(sc->hn_chim_bmap[idx] & mask, 740 ("index bitmap 0x%lx, chimney index %u, " 741 "bitmap idx %d, bitmask 0x%lx", 742 sc->hn_chim_bmap[idx], chim_idx, idx, mask)); 743 744 atomic_clear_long(&sc->hn_chim_bmap[idx], mask); 745} 746 747#if defined(INET6) || defined(INET) 748 749#define PULLUP_HDR(m, len) \ 750do { \ 751 if (__predict_false((m)->m_len < (len))) { \ 752 (m) = m_pullup((m), (len)); \ 753 if ((m) == NULL) \ 754 return (NULL); \ 755 } \ 756} while (0) 757 758/* 759 * NOTE: If this function failed, the m_head would be freed. 760 */ 761static __inline struct mbuf * 762hn_tso_fixup(struct mbuf *m_head) 763{ 764 struct ether_vlan_header *evl; 765 struct tcphdr *th; 766 int ehlen; 767 768 KASSERT(M_WRITABLE(m_head), ("TSO mbuf not writable")); 769 770 PULLUP_HDR(m_head, sizeof(*evl)); 771 evl = mtod(m_head, struct ether_vlan_header *); 772 if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN)) 773 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 774 else 775 ehlen = ETHER_HDR_LEN; 776 m_head->m_pkthdr.l2hlen = ehlen; 777 778#ifdef INET 779 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { 780 struct ip *ip; 781 int iphlen; 782 783 PULLUP_HDR(m_head, ehlen + sizeof(*ip)); 784 ip = mtodo(m_head, ehlen); 785 iphlen = ip->ip_hl << 2; 786 m_head->m_pkthdr.l3hlen = iphlen; 787 788 PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th)); 789 th = mtodo(m_head, ehlen + iphlen); 790 791 ip->ip_len = 0; 792 ip->ip_sum = 0; 793 th->th_sum = in_pseudo(ip->ip_src.s_addr, 794 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 795 } 796#endif 797#if defined(INET6) && defined(INET) 798 else 799#endif 800#ifdef INET6 801 { 802 struct ip6_hdr *ip6; 803 804 PULLUP_HDR(m_head, ehlen + sizeof(*ip6)); 805 ip6 = mtodo(m_head, ehlen); 806 if (ip6->ip6_nxt != IPPROTO_TCP) { 807 m_freem(m_head); 808 return (NULL); 809 } 810 m_head->m_pkthdr.l3hlen = sizeof(*ip6); 811 812 PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th)); 813 th = mtodo(m_head, ehlen + sizeof(*ip6)); 814 815 ip6->ip6_plen = 0; 816 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 817 } 818#endif 819 return (m_head); 820} 821 822/* 823 * NOTE: If this function failed, the m_head would be freed. 824 */ 825static __inline struct mbuf * 826hn_set_hlen(struct mbuf *m_head) 827{ 828 const struct ether_vlan_header *evl; 829 int ehlen; 830 831 PULLUP_HDR(m_head, sizeof(*evl)); 832 evl = mtod(m_head, const struct ether_vlan_header *); 833 if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN)) 834 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 835 else 836 ehlen = ETHER_HDR_LEN; 837 m_head->m_pkthdr.l2hlen = ehlen; 838 839#ifdef INET 840 if (m_head->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP_UDP)) { 841 const struct ip *ip; 842 int iphlen; 843 844 PULLUP_HDR(m_head, ehlen + sizeof(*ip)); 845 ip = mtodo(m_head, ehlen); 846 iphlen = ip->ip_hl << 2; 847 m_head->m_pkthdr.l3hlen = iphlen; 848 849 /* 850 * UDP checksum offload does not work in Azure, if the 851 * following conditions meet: 852 * - sizeof(IP hdr + UDP hdr + payload) > 1420. 853 * - IP_DF is not set in the IP hdr. 854 * 855 * Fallback to software checksum for these UDP datagrams. 856 */ 857 if ((m_head->m_pkthdr.csum_flags & CSUM_IP_UDP) && 858 m_head->m_pkthdr.len > hn_udpcs_fixup_mtu + ehlen && 859 (ntohs(ip->ip_off) & IP_DF) == 0) { 860 uint16_t off = ehlen + iphlen; 861 862 counter_u64_add(hn_udpcs_fixup, 1); 863 PULLUP_HDR(m_head, off + sizeof(struct udphdr)); 864 *(uint16_t *)(m_head->m_data + off + 865 m_head->m_pkthdr.csum_data) = in_cksum_skip( 866 m_head, m_head->m_pkthdr.len, off); 867 m_head->m_pkthdr.csum_flags &= ~CSUM_IP_UDP; 868 } 869 } 870#endif 871#if defined(INET6) && defined(INET) 872 else 873#endif 874#ifdef INET6 875 { 876 const struct ip6_hdr *ip6; 877 878 PULLUP_HDR(m_head, ehlen + sizeof(*ip6)); 879 ip6 = mtodo(m_head, ehlen); 880 if (ip6->ip6_nxt != IPPROTO_TCP && 881 ip6->ip6_nxt != IPPROTO_UDP) { 882 m_freem(m_head); 883 return (NULL); 884 } 885 m_head->m_pkthdr.l3hlen = sizeof(*ip6); 886 } 887#endif 888 return (m_head); 889} 890 891/* 892 * NOTE: If this function failed, the m_head would be freed. 893 */ 894static __inline struct mbuf * 895hn_check_tcpsyn(struct mbuf *m_head, int *tcpsyn) 896{ 897 const struct tcphdr *th; 898 int ehlen, iphlen; 899 900 *tcpsyn = 0; 901 ehlen = m_head->m_pkthdr.l2hlen; 902 iphlen = m_head->m_pkthdr.l3hlen; 903 904 PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th)); 905 th = mtodo(m_head, ehlen + iphlen); 906 if (th->th_flags & TH_SYN) 907 *tcpsyn = 1; 908 return (m_head); 909} 910 911#undef PULLUP_HDR 912 913#endif /* INET6 || INET */ 914 915static int 916hn_set_rxfilter(struct hn_softc *sc, uint32_t filter) 917{ 918 int error = 0; 919 920 HN_LOCK_ASSERT(sc); 921 922 if (sc->hn_rx_filter != filter) { 923 error = hn_rndis_set_rxfilter(sc, filter); 924 if (!error) 925 sc->hn_rx_filter = filter; 926 } 927 return (error); 928} 929 930static int 931hn_rxfilter_config(struct hn_softc *sc) 932{ 933 struct ifnet *ifp = sc->hn_ifp; 934 uint32_t filter; 935 936 HN_LOCK_ASSERT(sc); 937 938 /* 939 * If the non-transparent mode VF is activated, we don't know how 940 * its RX filter is configured, so stick the synthetic device in 941 * the promiscous mode. 942 */ 943 if ((ifp->if_flags & IFF_PROMISC) || (sc->hn_flags & HN_FLAG_RXVF)) { 944 filter = NDIS_PACKET_TYPE_PROMISCUOUS; 945 } else { 946 filter = NDIS_PACKET_TYPE_DIRECTED; 947 if (ifp->if_flags & IFF_BROADCAST) 948 filter |= NDIS_PACKET_TYPE_BROADCAST; 949 /* TODO: support multicast list */ 950 if ((ifp->if_flags & IFF_ALLMULTI) || 951 !CK_STAILQ_EMPTY(&ifp->if_multiaddrs)) 952 filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; 953 } 954 return (hn_set_rxfilter(sc, filter)); 955} 956 957static void 958hn_set_txagg(struct hn_softc *sc) 959{ 960 uint32_t size, pkts; 961 int i; 962 963 /* 964 * Setup aggregation size. 965 */ 966 if (sc->hn_agg_size < 0) 967 size = UINT32_MAX; 968 else 969 size = sc->hn_agg_size; 970 971 if (sc->hn_rndis_agg_size < size) 972 size = sc->hn_rndis_agg_size; 973 974 /* NOTE: We only aggregate packets using chimney sending buffers. */ 975 if (size > (uint32_t)sc->hn_chim_szmax) 976 size = sc->hn_chim_szmax; 977 978 if (size <= 2 * HN_PKTSIZE_MIN(sc->hn_rndis_agg_align)) { 979 /* Disable */ 980 size = 0; 981 pkts = 0; 982 goto done; 983 } 984 985 /* NOTE: Type of the per TX ring setting is 'int'. */ 986 if (size > INT_MAX) 987 size = INT_MAX; 988 989 /* 990 * Setup aggregation packet count. 991 */ 992 if (sc->hn_agg_pkts < 0) 993 pkts = UINT32_MAX; 994 else 995 pkts = sc->hn_agg_pkts; 996 997 if (sc->hn_rndis_agg_pkts < pkts) 998 pkts = sc->hn_rndis_agg_pkts; 999 1000 if (pkts <= 1) { 1001 /* Disable */ 1002 size = 0; 1003 pkts = 0; 1004 goto done; 1005 } 1006 1007 /* NOTE: Type of the per TX ring setting is 'short'. */ 1008 if (pkts > SHRT_MAX) 1009 pkts = SHRT_MAX; 1010 1011done: 1012 /* NOTE: Type of the per TX ring setting is 'short'. */ 1013 if (sc->hn_rndis_agg_align > SHRT_MAX) { 1014 /* Disable */ 1015 size = 0; 1016 pkts = 0; 1017 } 1018 1019 if (bootverbose) { 1020 if_printf(sc->hn_ifp, "TX agg size %u, pkts %u, align %u\n", 1021 size, pkts, sc->hn_rndis_agg_align); 1022 } 1023 1024 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 1025 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 1026 1027 mtx_lock(&txr->hn_tx_lock); 1028 txr->hn_agg_szmax = size; 1029 txr->hn_agg_pktmax = pkts; 1030 txr->hn_agg_align = sc->hn_rndis_agg_align; 1031 mtx_unlock(&txr->hn_tx_lock); 1032 } 1033} 1034 1035static int 1036hn_get_txswq_depth(const struct hn_tx_ring *txr) 1037{ 1038 1039 KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet")); 1040 if (hn_tx_swq_depth < txr->hn_txdesc_cnt) 1041 return txr->hn_txdesc_cnt; 1042 return hn_tx_swq_depth; 1043} 1044 1045static int 1046hn_rss_reconfig(struct hn_softc *sc) 1047{ 1048 int error; 1049 1050 HN_LOCK_ASSERT(sc); 1051 1052 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 1053 return (ENXIO); 1054 1055 /* 1056 * Disable RSS first. 1057 * 1058 * NOTE: 1059 * Direct reconfiguration by setting the UNCHG flags does 1060 * _not_ work properly. 1061 */ 1062 if (bootverbose) 1063 if_printf(sc->hn_ifp, "disable RSS\n"); 1064 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_DISABLE); 1065 if (error) { 1066 if_printf(sc->hn_ifp, "RSS disable failed\n"); 1067 return (error); 1068 } 1069 1070 /* 1071 * Reenable the RSS w/ the updated RSS key or indirect 1072 * table. 1073 */ 1074 if (bootverbose) 1075 if_printf(sc->hn_ifp, "reconfig RSS\n"); 1076 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); 1077 if (error) { 1078 if_printf(sc->hn_ifp, "RSS reconfig failed\n"); 1079 return (error); 1080 } 1081 return (0); 1082} 1083 1084static void 1085hn_rss_ind_fixup(struct hn_softc *sc) 1086{ 1087 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; 1088 int i, nchan; 1089 1090 nchan = sc->hn_rx_ring_inuse; 1091 KASSERT(nchan > 1, ("invalid # of channels %d", nchan)); 1092 1093 /* 1094 * Check indirect table to make sure that all channels in it 1095 * can be used. 1096 */ 1097 for (i = 0; i < NDIS_HASH_INDCNT; ++i) { 1098 if (rss->rss_ind[i] >= nchan) { 1099 if_printf(sc->hn_ifp, 1100 "RSS indirect table %d fixup: %u -> %d\n", 1101 i, rss->rss_ind[i], nchan - 1); 1102 rss->rss_ind[i] = nchan - 1; 1103 } 1104 } 1105} 1106 1107static int 1108hn_ifmedia_upd(struct ifnet *ifp __unused) 1109{ 1110 1111 return EOPNOTSUPP; 1112} 1113 1114static void 1115hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 1116{ 1117 struct hn_softc *sc = ifp->if_softc; 1118 1119 ifmr->ifm_status = IFM_AVALID; 1120 ifmr->ifm_active = IFM_ETHER; 1121 1122 if ((sc->hn_link_flags & HN_LINK_FLAG_LINKUP) == 0) { 1123 ifmr->ifm_active |= IFM_NONE; 1124 return; 1125 } 1126 ifmr->ifm_status |= IFM_ACTIVE; 1127 ifmr->ifm_active |= IFM_10G_T | IFM_FDX; 1128} 1129 1130static void 1131hn_rxvf_set_task(void *xarg, int pending __unused) 1132{ 1133 struct hn_rxvf_setarg *arg = xarg; 1134 1135 arg->rxr->hn_rxvf_ifp = arg->vf_ifp; 1136} 1137 1138static void 1139hn_rxvf_set(struct hn_softc *sc, struct ifnet *vf_ifp) 1140{ 1141 struct hn_rx_ring *rxr; 1142 struct hn_rxvf_setarg arg; 1143 struct task task; 1144 int i; 1145 1146 HN_LOCK_ASSERT(sc); 1147 1148 TASK_INIT(&task, 0, hn_rxvf_set_task, &arg); 1149 1150 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 1151 rxr = &sc->hn_rx_ring[i]; 1152 1153 if (i < sc->hn_rx_ring_inuse) { 1154 arg.rxr = rxr; 1155 arg.vf_ifp = vf_ifp; 1156 vmbus_chan_run_task(rxr->hn_chan, &task); 1157 } else { 1158 rxr->hn_rxvf_ifp = vf_ifp; 1159 } 1160 } 1161} 1162 1163static bool 1164hn_ismyvf(const struct hn_softc *sc, const struct ifnet *ifp) 1165{ 1166 const struct ifnet *hn_ifp; 1167 1168 hn_ifp = sc->hn_ifp; 1169 1170 if (ifp == hn_ifp) 1171 return (false); 1172 1173 if (ifp->if_alloctype != IFT_ETHER) 1174 return (false); 1175 1176 /* Ignore lagg/vlan interfaces */ 1177 if (strcmp(ifp->if_dname, "lagg") == 0 || 1178 strcmp(ifp->if_dname, "vlan") == 0) 1179 return (false); 1180 1181 /* 1182 * During detach events ifp->if_addr might be NULL. 1183 * Make sure the bcmp() below doesn't panic on that: 1184 */ 1185 if (ifp->if_addr == NULL || hn_ifp->if_addr == NULL) 1186 return (false); 1187 1188 if (bcmp(IF_LLADDR(ifp), IF_LLADDR(hn_ifp), ETHER_ADDR_LEN) != 0) 1189 return (false); 1190 1191 return (true); 1192} 1193 1194static void 1195hn_rxvf_change(struct hn_softc *sc, struct ifnet *ifp, bool rxvf) 1196{ 1197 struct ifnet *hn_ifp; 1198 1199 HN_LOCK(sc); 1200 1201 if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 1202 goto out; 1203 1204 if (!hn_ismyvf(sc, ifp)) 1205 goto out; 1206 hn_ifp = sc->hn_ifp; 1207 1208 if (rxvf) { 1209 if (sc->hn_flags & HN_FLAG_RXVF) 1210 goto out; 1211 1212 sc->hn_flags |= HN_FLAG_RXVF; 1213 hn_rxfilter_config(sc); 1214 } else { 1215 if (!(sc->hn_flags & HN_FLAG_RXVF)) 1216 goto out; 1217 1218 sc->hn_flags &= ~HN_FLAG_RXVF; 1219 if (hn_ifp->if_drv_flags & IFF_DRV_RUNNING) 1220 hn_rxfilter_config(sc); 1221 else 1222 hn_set_rxfilter(sc, NDIS_PACKET_TYPE_NONE); 1223 } 1224 1225 hn_nvs_set_datapath(sc, 1226 rxvf ? HN_NVS_DATAPATH_VF : HN_NVS_DATAPATH_SYNTH); 1227 1228 hn_rxvf_set(sc, rxvf ? ifp : NULL); 1229 1230 if (rxvf) { 1231 hn_vf_rss_fixup(sc, true); 1232 hn_suspend_mgmt(sc); 1233 sc->hn_link_flags &= 1234 ~(HN_LINK_FLAG_LINKUP | HN_LINK_FLAG_NETCHG); 1235 if_link_state_change(hn_ifp, LINK_STATE_DOWN); 1236 } else { 1237 hn_vf_rss_restore(sc); 1238 hn_resume_mgmt(sc); 1239 } 1240 1241 devctl_notify("HYPERV_NIC_VF", hn_ifp->if_xname, 1242 rxvf ? "VF_UP" : "VF_DOWN", NULL); 1243 1244 if (bootverbose) { 1245 if_printf(hn_ifp, "datapath is switched %s %s\n", 1246 rxvf ? "to" : "from", ifp->if_xname); 1247 } 1248out: 1249 HN_UNLOCK(sc); 1250} 1251 1252static void 1253hn_ifnet_event(void *arg, struct ifnet *ifp, int event) 1254{ 1255 1256 if (event != IFNET_EVENT_UP && event != IFNET_EVENT_DOWN) 1257 return; 1258 hn_rxvf_change(arg, ifp, event == IFNET_EVENT_UP); 1259} 1260 1261static void 1262hn_ifaddr_event(void *arg, struct ifnet *ifp) 1263{ 1264 1265 hn_rxvf_change(arg, ifp, ifp->if_flags & IFF_UP); 1266} 1267 1268static int 1269hn_xpnt_vf_iocsetcaps(struct hn_softc *sc, struct ifreq *ifr) 1270{ 1271 struct ifnet *ifp, *vf_ifp; 1272 uint64_t tmp; 1273 int error; 1274 1275 HN_LOCK_ASSERT(sc); 1276 ifp = sc->hn_ifp; 1277 vf_ifp = sc->hn_vf_ifp; 1278 1279 /* 1280 * Fix up requested capabilities w/ supported capabilities, 1281 * since the supported capabilities could have been changed. 1282 */ 1283 ifr->ifr_reqcap &= ifp->if_capabilities; 1284 /* Pass SIOCSIFCAP to VF. */ 1285 error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFCAP, (caddr_t)ifr); 1286 1287 /* 1288 * NOTE: 1289 * The error will be propagated to the callers, however, it 1290 * is _not_ useful here. 1291 */ 1292 1293 /* 1294 * Merge VF's enabled capabilities. 1295 */ 1296 ifp->if_capenable = vf_ifp->if_capenable & ifp->if_capabilities; 1297 1298 tmp = vf_ifp->if_hwassist & HN_CSUM_IP_HWASSIST(sc); 1299 if (ifp->if_capenable & IFCAP_TXCSUM) 1300 ifp->if_hwassist |= tmp; 1301 else 1302 ifp->if_hwassist &= ~tmp; 1303 1304 tmp = vf_ifp->if_hwassist & HN_CSUM_IP6_HWASSIST(sc); 1305 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) 1306 ifp->if_hwassist |= tmp; 1307 else 1308 ifp->if_hwassist &= ~tmp; 1309 1310 tmp = vf_ifp->if_hwassist & CSUM_IP_TSO; 1311 if (ifp->if_capenable & IFCAP_TSO4) 1312 ifp->if_hwassist |= tmp; 1313 else 1314 ifp->if_hwassist &= ~tmp; 1315 1316 tmp = vf_ifp->if_hwassist & CSUM_IP6_TSO; 1317 if (ifp->if_capenable & IFCAP_TSO6) 1318 ifp->if_hwassist |= tmp; 1319 else 1320 ifp->if_hwassist &= ~tmp; 1321 1322 return (error); 1323} 1324 1325static int 1326hn_xpnt_vf_iocsetflags(struct hn_softc *sc) 1327{ 1328 struct ifnet *vf_ifp; 1329 struct ifreq ifr; 1330 1331 HN_LOCK_ASSERT(sc); 1332 vf_ifp = sc->hn_vf_ifp; 1333 1334 memset(&ifr, 0, sizeof(ifr)); 1335 strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); 1336 ifr.ifr_flags = vf_ifp->if_flags & 0xffff; 1337 ifr.ifr_flagshigh = vf_ifp->if_flags >> 16; 1338 return (vf_ifp->if_ioctl(vf_ifp, SIOCSIFFLAGS, (caddr_t)&ifr)); 1339} 1340 1341static void 1342hn_xpnt_vf_saveifflags(struct hn_softc *sc) 1343{ 1344 struct ifnet *ifp = sc->hn_ifp; 1345 int allmulti = 0; 1346 1347 HN_LOCK_ASSERT(sc); 1348 1349 /* XXX vlan(4) style mcast addr maintenance */ 1350 if (!CK_STAILQ_EMPTY(&ifp->if_multiaddrs)) 1351 allmulti = IFF_ALLMULTI; 1352 1353 /* Always set the VF's if_flags */ 1354 sc->hn_vf_ifp->if_flags = ifp->if_flags | allmulti; 1355} 1356 1357static void 1358hn_xpnt_vf_input(struct ifnet *vf_ifp, struct mbuf *m) 1359{ 1360 struct rm_priotracker pt; 1361 struct ifnet *hn_ifp = NULL; 1362 struct mbuf *mn; 1363 1364 /* 1365 * XXX racy, if hn(4) ever detached. 1366 */ 1367 rm_rlock(&hn_vfmap_lock, &pt); 1368 if (vf_ifp->if_index < hn_vfmap_size) 1369 hn_ifp = hn_vfmap[vf_ifp->if_index]; 1370 rm_runlock(&hn_vfmap_lock, &pt); 1371 1372 if (hn_ifp != NULL) { 1373 for (mn = m; mn != NULL; mn = mn->m_nextpkt) { 1374 /* 1375 * Allow tapping on the VF. 1376 */ 1377 ETHER_BPF_MTAP(vf_ifp, mn); 1378 1379 /* 1380 * Update VF stats. 1381 */ 1382 if ((vf_ifp->if_capenable & IFCAP_HWSTATS) == 0) { 1383 if_inc_counter(vf_ifp, IFCOUNTER_IBYTES, 1384 mn->m_pkthdr.len); 1385 } 1386 /* 1387 * XXX IFCOUNTER_IMCAST 1388 * This stat updating is kinda invasive, since it 1389 * requires two checks on the mbuf: the length check 1390 * and the ethernet header check. As of this write, 1391 * all multicast packets go directly to hn(4), which 1392 * makes imcast stat updating in the VF a try in vian. 1393 */ 1394 1395 /* 1396 * Fix up rcvif and increase hn(4)'s ipackets. 1397 */ 1398 mn->m_pkthdr.rcvif = hn_ifp; 1399 if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1); 1400 } 1401 /* 1402 * Go through hn(4)'s if_input. 1403 */ 1404 hn_ifp->if_input(hn_ifp, m); 1405 } else { 1406 /* 1407 * In the middle of the transition; free this 1408 * mbuf chain. 1409 */ 1410 while (m != NULL) { 1411 mn = m->m_nextpkt; 1412 m->m_nextpkt = NULL; 1413 m_freem(m); 1414 m = mn; 1415 } 1416 } 1417} 1418 1419static void 1420hn_mtu_change_fixup(struct hn_softc *sc) 1421{ 1422 struct ifnet *ifp; 1423 1424 HN_LOCK_ASSERT(sc); 1425 ifp = sc->hn_ifp; 1426 1427 hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu); 1428#if __FreeBSD_version >= 1100099 1429 if (sc->hn_rx_ring[0].hn_lro.lro_length_lim < HN_LRO_LENLIM_MIN(ifp)) 1430 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp)); 1431#endif 1432} 1433 1434static uint32_t 1435hn_rss_type_fromndis(uint32_t rss_hash) 1436{ 1437 uint32_t types = 0; 1438 1439 if (rss_hash & NDIS_HASH_IPV4) 1440 types |= RSS_TYPE_IPV4; 1441 if (rss_hash & NDIS_HASH_TCP_IPV4) 1442 types |= RSS_TYPE_TCP_IPV4; 1443 if (rss_hash & NDIS_HASH_IPV6) 1444 types |= RSS_TYPE_IPV6; 1445 if (rss_hash & NDIS_HASH_IPV6_EX) 1446 types |= RSS_TYPE_IPV6_EX; 1447 if (rss_hash & NDIS_HASH_TCP_IPV6) 1448 types |= RSS_TYPE_TCP_IPV6; 1449 if (rss_hash & NDIS_HASH_TCP_IPV6_EX) 1450 types |= RSS_TYPE_TCP_IPV6_EX; 1451 if (rss_hash & NDIS_HASH_UDP_IPV4_X) 1452 types |= RSS_TYPE_UDP_IPV4; 1453 return (types); 1454} 1455 1456static uint32_t 1457hn_rss_type_tondis(uint32_t types) 1458{ 1459 uint32_t rss_hash = 0; 1460 1461 KASSERT((types & (RSS_TYPE_UDP_IPV6 | RSS_TYPE_UDP_IPV6_EX)) == 0, 1462 ("UDP6 and UDP6EX are not supported")); 1463 1464 if (types & RSS_TYPE_IPV4) 1465 rss_hash |= NDIS_HASH_IPV4; 1466 if (types & RSS_TYPE_TCP_IPV4) 1467 rss_hash |= NDIS_HASH_TCP_IPV4; 1468 if (types & RSS_TYPE_IPV6) 1469 rss_hash |= NDIS_HASH_IPV6; 1470 if (types & RSS_TYPE_IPV6_EX) 1471 rss_hash |= NDIS_HASH_IPV6_EX; 1472 if (types & RSS_TYPE_TCP_IPV6) 1473 rss_hash |= NDIS_HASH_TCP_IPV6; 1474 if (types & RSS_TYPE_TCP_IPV6_EX) 1475 rss_hash |= NDIS_HASH_TCP_IPV6_EX; 1476 if (types & RSS_TYPE_UDP_IPV4) 1477 rss_hash |= NDIS_HASH_UDP_IPV4_X; 1478 return (rss_hash); 1479} 1480 1481static void 1482hn_rss_mbuf_hash(struct hn_softc *sc, uint32_t mbuf_hash) 1483{ 1484 int i; 1485 1486 HN_LOCK_ASSERT(sc); 1487 1488 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 1489 sc->hn_rx_ring[i].hn_mbuf_hash = mbuf_hash; 1490} 1491 1492static void 1493hn_vf_rss_fixup(struct hn_softc *sc, bool reconf) 1494{ 1495 struct ifnet *ifp, *vf_ifp; 1496 struct ifrsshash ifrh; 1497 struct ifrsskey ifrk; 1498 int error; 1499 uint32_t my_types, diff_types, mbuf_types = 0; 1500 1501 HN_LOCK_ASSERT(sc); 1502 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 1503 ("%s: synthetic parts are not attached", sc->hn_ifp->if_xname)); 1504 1505 if (sc->hn_rx_ring_inuse == 1) { 1506 /* No RSS on synthetic parts; done. */ 1507 return; 1508 } 1509 if ((sc->hn_rss_hcap & NDIS_HASH_FUNCTION_TOEPLITZ) == 0) { 1510 /* Synthetic parts do not support Toeplitz; done. */ 1511 return; 1512 } 1513 1514 ifp = sc->hn_ifp; 1515 vf_ifp = sc->hn_vf_ifp; 1516 1517 /* 1518 * Extract VF's RSS key. Only 40 bytes key for Toeplitz is 1519 * supported. 1520 */ 1521 memset(&ifrk, 0, sizeof(ifrk)); 1522 strlcpy(ifrk.ifrk_name, vf_ifp->if_xname, sizeof(ifrk.ifrk_name)); 1523 error = vf_ifp->if_ioctl(vf_ifp, SIOCGIFRSSKEY, (caddr_t)&ifrk); 1524 if (error) { 1525 if_printf(ifp, "%s SIOCGIFRSSKEY failed: %d\n", 1526 vf_ifp->if_xname, error); 1527 goto done; 1528 } 1529 if (ifrk.ifrk_func != RSS_FUNC_TOEPLITZ) { 1530 if_printf(ifp, "%s RSS function %u is not Toeplitz\n", 1531 vf_ifp->if_xname, ifrk.ifrk_func); 1532 goto done; 1533 } 1534 if (ifrk.ifrk_keylen != NDIS_HASH_KEYSIZE_TOEPLITZ) { 1535 if_printf(ifp, "%s invalid RSS Toeplitz key length %d\n", 1536 vf_ifp->if_xname, ifrk.ifrk_keylen); 1537 goto done; 1538 } 1539 1540 /* 1541 * Extract VF's RSS hash. Only Toeplitz is supported. 1542 */ 1543 memset(&ifrh, 0, sizeof(ifrh)); 1544 strlcpy(ifrh.ifrh_name, vf_ifp->if_xname, sizeof(ifrh.ifrh_name)); 1545 error = vf_ifp->if_ioctl(vf_ifp, SIOCGIFRSSHASH, (caddr_t)&ifrh); 1546 if (error) { 1547 if_printf(ifp, "%s SIOCGRSSHASH failed: %d\n", 1548 vf_ifp->if_xname, error); 1549 goto done; 1550 } 1551 if (ifrh.ifrh_func != RSS_FUNC_TOEPLITZ) { 1552 if_printf(ifp, "%s RSS function %u is not Toeplitz\n", 1553 vf_ifp->if_xname, ifrh.ifrh_func); 1554 goto done; 1555 } 1556 1557 my_types = hn_rss_type_fromndis(sc->hn_rss_hcap); 1558 if ((ifrh.ifrh_types & my_types) == 0) { 1559 /* This disables RSS; ignore it then */ 1560 if_printf(ifp, "%s intersection of RSS types failed. " 1561 "VF %#x, mine %#x\n", vf_ifp->if_xname, 1562 ifrh.ifrh_types, my_types); 1563 goto done; 1564 } 1565 1566 diff_types = my_types ^ ifrh.ifrh_types; 1567 my_types &= ifrh.ifrh_types; 1568 mbuf_types = my_types; 1569 1570 /* 1571 * Detect RSS hash value/type confliction. 1572 * 1573 * NOTE: 1574 * We don't disable the hash type, but stop delivery the hash 1575 * value/type through mbufs on RX path. 1576 * 1577 * XXX If HN_CAP_UDPHASH is set in hn_caps, then UDP 4-tuple 1578 * hash is delivered with type of TCP_IPV4. This means if 1579 * UDP_IPV4 is enabled, then TCP_IPV4 should be forced, at 1580 * least to hn_mbuf_hash. However, given that _all_ of the 1581 * NICs implement TCP_IPV4, this will _not_ impose any issues 1582 * here. 1583 */ 1584 if ((my_types & RSS_TYPE_IPV4) && 1585 (diff_types & ifrh.ifrh_types & 1586 (RSS_TYPE_TCP_IPV4 | RSS_TYPE_UDP_IPV4))) { 1587 /* Conflict; disable IPV4 hash type/value delivery. */ 1588 if_printf(ifp, "disable IPV4 mbuf hash delivery\n"); 1589 mbuf_types &= ~RSS_TYPE_IPV4; 1590 } 1591 if ((my_types & RSS_TYPE_IPV6) && 1592 (diff_types & ifrh.ifrh_types & 1593 (RSS_TYPE_TCP_IPV6 | RSS_TYPE_UDP_IPV6 | 1594 RSS_TYPE_TCP_IPV6_EX | RSS_TYPE_UDP_IPV6_EX | 1595 RSS_TYPE_IPV6_EX))) { 1596 /* Conflict; disable IPV6 hash type/value delivery. */ 1597 if_printf(ifp, "disable IPV6 mbuf hash delivery\n"); 1598 mbuf_types &= ~RSS_TYPE_IPV6; 1599 } 1600 if ((my_types & RSS_TYPE_IPV6_EX) && 1601 (diff_types & ifrh.ifrh_types & 1602 (RSS_TYPE_TCP_IPV6 | RSS_TYPE_UDP_IPV6 | 1603 RSS_TYPE_TCP_IPV6_EX | RSS_TYPE_UDP_IPV6_EX | 1604 RSS_TYPE_IPV6))) { 1605 /* Conflict; disable IPV6_EX hash type/value delivery. */ 1606 if_printf(ifp, "disable IPV6_EX mbuf hash delivery\n"); 1607 mbuf_types &= ~RSS_TYPE_IPV6_EX; 1608 } 1609 if ((my_types & RSS_TYPE_TCP_IPV6) && 1610 (diff_types & ifrh.ifrh_types & RSS_TYPE_TCP_IPV6_EX)) { 1611 /* Conflict; disable TCP_IPV6 hash type/value delivery. */ 1612 if_printf(ifp, "disable TCP_IPV6 mbuf hash delivery\n"); 1613 mbuf_types &= ~RSS_TYPE_TCP_IPV6; 1614 } 1615 if ((my_types & RSS_TYPE_TCP_IPV6_EX) && 1616 (diff_types & ifrh.ifrh_types & RSS_TYPE_TCP_IPV6)) { 1617 /* Conflict; disable TCP_IPV6_EX hash type/value delivery. */ 1618 if_printf(ifp, "disable TCP_IPV6_EX mbuf hash delivery\n"); 1619 mbuf_types &= ~RSS_TYPE_TCP_IPV6_EX; 1620 } 1621 if ((my_types & RSS_TYPE_UDP_IPV6) && 1622 (diff_types & ifrh.ifrh_types & RSS_TYPE_UDP_IPV6_EX)) { 1623 /* Conflict; disable UDP_IPV6 hash type/value delivery. */ 1624 if_printf(ifp, "disable UDP_IPV6 mbuf hash delivery\n"); 1625 mbuf_types &= ~RSS_TYPE_UDP_IPV6; 1626 } 1627 if ((my_types & RSS_TYPE_UDP_IPV6_EX) && 1628 (diff_types & ifrh.ifrh_types & RSS_TYPE_UDP_IPV6)) { 1629 /* Conflict; disable UDP_IPV6_EX hash type/value delivery. */ 1630 if_printf(ifp, "disable UDP_IPV6_EX mbuf hash delivery\n"); 1631 mbuf_types &= ~RSS_TYPE_UDP_IPV6_EX; 1632 } 1633 1634 /* 1635 * Indirect table does not matter. 1636 */ 1637 1638 sc->hn_rss_hash = (sc->hn_rss_hcap & NDIS_HASH_FUNCTION_MASK) | 1639 hn_rss_type_tondis(my_types); 1640 memcpy(sc->hn_rss.rss_key, ifrk.ifrk_key, sizeof(sc->hn_rss.rss_key)); 1641 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 1642 1643 if (reconf) { 1644 error = hn_rss_reconfig(sc); 1645 if (error) { 1646 /* XXX roll-back? */ 1647 if_printf(ifp, "hn_rss_reconfig failed: %d\n", error); 1648 /* XXX keep going. */ 1649 } 1650 } 1651done: 1652 /* Hash deliverability for mbufs. */ 1653 hn_rss_mbuf_hash(sc, hn_rss_type_tondis(mbuf_types)); 1654} 1655 1656static void 1657hn_vf_rss_restore(struct hn_softc *sc) 1658{ 1659 1660 HN_LOCK_ASSERT(sc); 1661 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 1662 ("%s: synthetic parts are not attached", sc->hn_ifp->if_xname)); 1663 1664 if (sc->hn_rx_ring_inuse == 1) 1665 goto done; 1666 1667 /* 1668 * Restore hash types. Key does _not_ matter. 1669 */ 1670 if (sc->hn_rss_hash != sc->hn_rss_hcap) { 1671 int error; 1672 1673 sc->hn_rss_hash = sc->hn_rss_hcap; 1674 error = hn_rss_reconfig(sc); 1675 if (error) { 1676 if_printf(sc->hn_ifp, "hn_rss_reconfig failed: %d\n", 1677 error); 1678 /* XXX keep going. */ 1679 } 1680 } 1681done: 1682 /* Hash deliverability for mbufs. */ 1683 hn_rss_mbuf_hash(sc, NDIS_HASH_ALL); 1684} 1685 1686static void 1687hn_xpnt_vf_setready(struct hn_softc *sc) 1688{ 1689 struct ifnet *ifp, *vf_ifp; 1690 struct ifreq ifr; 1691 1692 HN_LOCK_ASSERT(sc); 1693 ifp = sc->hn_ifp; 1694 vf_ifp = sc->hn_vf_ifp; 1695 1696 /* 1697 * Mark the VF ready. 1698 */ 1699 sc->hn_vf_rdytick = 0; 1700 1701 /* 1702 * Save information for restoration. 1703 */ 1704 sc->hn_saved_caps = ifp->if_capabilities; 1705 sc->hn_saved_tsomax = ifp->if_hw_tsomax; 1706 sc->hn_saved_tsosegcnt = ifp->if_hw_tsomaxsegcount; 1707 sc->hn_saved_tsosegsz = ifp->if_hw_tsomaxsegsize; 1708 1709 /* 1710 * Intersect supported/enabled capabilities. 1711 * 1712 * NOTE: 1713 * if_hwassist is not changed here. 1714 */ 1715 ifp->if_capabilities &= vf_ifp->if_capabilities; 1716 ifp->if_capenable &= ifp->if_capabilities; 1717 1718 /* 1719 * Fix TSO settings. 1720 */ 1721 if (ifp->if_hw_tsomax > vf_ifp->if_hw_tsomax) 1722 ifp->if_hw_tsomax = vf_ifp->if_hw_tsomax; 1723 if (ifp->if_hw_tsomaxsegcount > vf_ifp->if_hw_tsomaxsegcount) 1724 ifp->if_hw_tsomaxsegcount = vf_ifp->if_hw_tsomaxsegcount; 1725 if (ifp->if_hw_tsomaxsegsize > vf_ifp->if_hw_tsomaxsegsize) 1726 ifp->if_hw_tsomaxsegsize = vf_ifp->if_hw_tsomaxsegsize; 1727 1728 /* 1729 * Change VF's enabled capabilities. 1730 */ 1731 memset(&ifr, 0, sizeof(ifr)); 1732 strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); 1733 ifr.ifr_reqcap = ifp->if_capenable; 1734 hn_xpnt_vf_iocsetcaps(sc, &ifr); 1735 1736 if (ifp->if_mtu != ETHERMTU) { 1737 int error; 1738 1739 /* 1740 * Change VF's MTU. 1741 */ 1742 memset(&ifr, 0, sizeof(ifr)); 1743 strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); 1744 ifr.ifr_mtu = ifp->if_mtu; 1745 error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU, (caddr_t)&ifr); 1746 if (error) { 1747 if_printf(ifp, "%s SIOCSIFMTU %u failed\n", 1748 vf_ifp->if_xname, ifp->if_mtu); 1749 if (ifp->if_mtu > ETHERMTU) { 1750 if_printf(ifp, "change MTU to %d\n", ETHERMTU); 1751 1752 /* 1753 * XXX 1754 * No need to adjust the synthetic parts' MTU; 1755 * failure of the adjustment will cause us 1756 * infinite headache. 1757 */ 1758 ifp->if_mtu = ETHERMTU; 1759 hn_mtu_change_fixup(sc); 1760 } 1761 } 1762 } 1763} 1764 1765static bool 1766hn_xpnt_vf_isready(struct hn_softc *sc) 1767{ 1768 1769 HN_LOCK_ASSERT(sc); 1770 1771 if (!hn_xpnt_vf || sc->hn_vf_ifp == NULL) 1772 return (false); 1773 1774 if (sc->hn_vf_rdytick == 0) 1775 return (true); 1776 1777 if (sc->hn_vf_rdytick > ticks) 1778 return (false); 1779 1780 /* Mark VF as ready. */ 1781 hn_xpnt_vf_setready(sc); 1782 return (true); 1783} 1784 1785static void 1786hn_xpnt_vf_setenable(struct hn_softc *sc) 1787{ 1788 int i; 1789 1790 HN_LOCK_ASSERT(sc); 1791 1792 /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ 1793 rm_wlock(&sc->hn_vf_lock); 1794 sc->hn_xvf_flags |= HN_XVFFLAG_ENABLED; 1795 rm_wunlock(&sc->hn_vf_lock); 1796 1797 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 1798 sc->hn_rx_ring[i].hn_rx_flags |= HN_RX_FLAG_XPNT_VF; 1799} 1800 1801static void 1802hn_xpnt_vf_setdisable(struct hn_softc *sc, bool clear_vf) 1803{ 1804 int i; 1805 1806 HN_LOCK_ASSERT(sc); 1807 1808 /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ 1809 rm_wlock(&sc->hn_vf_lock); 1810 sc->hn_xvf_flags &= ~HN_XVFFLAG_ENABLED; 1811 if (clear_vf) 1812 sc->hn_vf_ifp = NULL; 1813 rm_wunlock(&sc->hn_vf_lock); 1814 1815 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 1816 sc->hn_rx_ring[i].hn_rx_flags &= ~HN_RX_FLAG_XPNT_VF; 1817} 1818 1819static void 1820hn_xpnt_vf_init(struct hn_softc *sc) 1821{ 1822 int error; 1823 1824 HN_LOCK_ASSERT(sc); 1825 1826 KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0, 1827 ("%s: transparent VF was enabled", sc->hn_ifp->if_xname)); 1828 1829 if (bootverbose) { 1830 if_printf(sc->hn_ifp, "try bringing up %s\n", 1831 sc->hn_vf_ifp->if_xname); 1832 } 1833 1834 /* 1835 * Bring the VF up. 1836 */ 1837 hn_xpnt_vf_saveifflags(sc); 1838 sc->hn_vf_ifp->if_flags |= IFF_UP; 1839 error = hn_xpnt_vf_iocsetflags(sc); 1840 if (error) { 1841 if_printf(sc->hn_ifp, "bringing up %s failed: %d\n", 1842 sc->hn_vf_ifp->if_xname, error); 1843 return; 1844 } 1845 1846 /* 1847 * NOTE: 1848 * Datapath setting must happen _after_ bringing the VF up. 1849 */ 1850 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF); 1851 1852 /* 1853 * NOTE: 1854 * Fixup RSS related bits _after_ the VF is brought up, since 1855 * many VFs generate RSS key during it's initialization. 1856 */ 1857 hn_vf_rss_fixup(sc, true); 1858 1859 /* Mark transparent mode VF as enabled. */ 1860 hn_xpnt_vf_setenable(sc); 1861} 1862 1863static void 1864hn_xpnt_vf_init_taskfunc(void *xsc, int pending __unused) 1865{ 1866 struct hn_softc *sc = xsc; 1867 1868 HN_LOCK(sc); 1869 1870 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 1871 goto done; 1872 if (sc->hn_vf_ifp == NULL) 1873 goto done; 1874 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 1875 goto done; 1876 1877 if (sc->hn_vf_rdytick != 0) { 1878 /* Mark VF as ready. */ 1879 hn_xpnt_vf_setready(sc); 1880 } 1881 1882 if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) { 1883 /* 1884 * Delayed VF initialization. 1885 */ 1886 if (bootverbose) { 1887 if_printf(sc->hn_ifp, "delayed initialize %s\n", 1888 sc->hn_vf_ifp->if_xname); 1889 } 1890 hn_xpnt_vf_init(sc); 1891 } 1892done: 1893 HN_UNLOCK(sc); 1894} 1895 1896static void 1897hn_ifnet_attevent(void *xsc, struct ifnet *ifp) 1898{ 1899 struct hn_softc *sc = xsc; 1900 1901 HN_LOCK(sc); 1902 1903 if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 1904 goto done; 1905 1906 if (!hn_ismyvf(sc, ifp)) 1907 goto done; 1908 1909 if (sc->hn_vf_ifp != NULL) { 1910 if_printf(sc->hn_ifp, "%s was attached as VF\n", 1911 sc->hn_vf_ifp->if_xname); 1912 goto done; 1913 } 1914 1915 if (hn_xpnt_vf && ifp->if_start != NULL) { 1916 /* 1917 * ifnet.if_start is _not_ supported by transparent 1918 * mode VF; mainly due to the IFF_DRV_OACTIVE flag. 1919 */ 1920 if_printf(sc->hn_ifp, "%s uses if_start, which is unsupported " 1921 "in transparent VF mode.\n", ifp->if_xname); 1922 goto done; 1923 } 1924 1925 rm_wlock(&hn_vfmap_lock); 1926 1927 if (ifp->if_index >= hn_vfmap_size) { 1928 struct ifnet **newmap; 1929 int newsize; 1930 1931 newsize = ifp->if_index + HN_VFMAP_SIZE_DEF; 1932 newmap = malloc(sizeof(struct ifnet *) * newsize, M_DEVBUF, 1933 M_WAITOK | M_ZERO); 1934 1935 memcpy(newmap, hn_vfmap, 1936 sizeof(struct ifnet *) * hn_vfmap_size); 1937 free(hn_vfmap, M_DEVBUF); 1938 hn_vfmap = newmap; 1939 hn_vfmap_size = newsize; 1940 } 1941 KASSERT(hn_vfmap[ifp->if_index] == NULL, 1942 ("%s: ifindex %d was mapped to %s", 1943 ifp->if_xname, ifp->if_index, hn_vfmap[ifp->if_index]->if_xname)); 1944 hn_vfmap[ifp->if_index] = sc->hn_ifp; 1945 1946 rm_wunlock(&hn_vfmap_lock); 1947 1948 /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ 1949 rm_wlock(&sc->hn_vf_lock); 1950 KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0, 1951 ("%s: transparent VF was enabled", sc->hn_ifp->if_xname)); 1952 sc->hn_vf_ifp = ifp; 1953 rm_wunlock(&sc->hn_vf_lock); 1954 1955 if (hn_xpnt_vf) { 1956 int wait_ticks; 1957 1958 /* 1959 * Install if_input for vf_ifp, which does vf_ifp -> hn_ifp. 1960 * Save vf_ifp's current if_input for later restoration. 1961 */ 1962 sc->hn_vf_input = ifp->if_input; 1963 ifp->if_input = hn_xpnt_vf_input; 1964 1965 /* 1966 * Stop link status management; use the VF's. 1967 */ 1968 hn_suspend_mgmt(sc); 1969 1970 /* 1971 * Give VF sometime to complete its attach routing. 1972 */ 1973 wait_ticks = hn_xpnt_vf_attwait * hz; 1974 sc->hn_vf_rdytick = ticks + wait_ticks; 1975 1976 taskqueue_enqueue_timeout(sc->hn_vf_taskq, &sc->hn_vf_init, 1977 wait_ticks); 1978 } 1979done: 1980 HN_UNLOCK(sc); 1981} 1982 1983static void 1984hn_ifnet_detevent(void *xsc, struct ifnet *ifp) 1985{ 1986 struct hn_softc *sc = xsc; 1987 1988 HN_LOCK(sc); 1989 1990 if (sc->hn_vf_ifp == NULL) 1991 goto done; 1992 1993 if (!hn_ismyvf(sc, ifp)) 1994 goto done; 1995 1996 if (hn_xpnt_vf) { 1997 /* 1998 * Make sure that the delayed initialization is not running. 1999 * 2000 * NOTE: 2001 * - This lock _must_ be released, since the hn_vf_init task 2002 * will try holding this lock. 2003 * - It is safe to release this lock here, since the 2004 * hn_ifnet_attevent() is interlocked by the hn_vf_ifp. 2005 * 2006 * XXX racy, if hn(4) ever detached. 2007 */ 2008 HN_UNLOCK(sc); 2009 taskqueue_drain_timeout(sc->hn_vf_taskq, &sc->hn_vf_init); 2010 HN_LOCK(sc); 2011 2012 KASSERT(sc->hn_vf_input != NULL, ("%s VF input is not saved", 2013 sc->hn_ifp->if_xname)); 2014 ifp->if_input = sc->hn_vf_input; 2015 sc->hn_vf_input = NULL; 2016 2017 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) && 2018 (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) 2019 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH); 2020 2021 if (sc->hn_vf_rdytick == 0) { 2022 /* 2023 * The VF was ready; restore some settings. 2024 */ 2025 sc->hn_ifp->if_capabilities = sc->hn_saved_caps; 2026 /* 2027 * NOTE: 2028 * There is _no_ need to fixup if_capenable and 2029 * if_hwassist, since the if_capabilities before 2030 * restoration was an intersection of the VF's 2031 * if_capabilites and the synthetic device's 2032 * if_capabilites. 2033 */ 2034 sc->hn_ifp->if_hw_tsomax = sc->hn_saved_tsomax; 2035 sc->hn_ifp->if_hw_tsomaxsegcount = 2036 sc->hn_saved_tsosegcnt; 2037 sc->hn_ifp->if_hw_tsomaxsegsize = sc->hn_saved_tsosegsz; 2038 } 2039 2040 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 2041 /* 2042 * Restore RSS settings. 2043 */ 2044 hn_vf_rss_restore(sc); 2045 2046 /* 2047 * Resume link status management, which was suspended 2048 * by hn_ifnet_attevent(). 2049 */ 2050 hn_resume_mgmt(sc); 2051 } 2052 } 2053 2054 /* Mark transparent mode VF as disabled. */ 2055 hn_xpnt_vf_setdisable(sc, true /* clear hn_vf_ifp */); 2056 2057 rm_wlock(&hn_vfmap_lock); 2058 2059 KASSERT(ifp->if_index < hn_vfmap_size, 2060 ("ifindex %d, vfmapsize %d", ifp->if_index, hn_vfmap_size)); 2061 if (hn_vfmap[ifp->if_index] != NULL) { 2062 KASSERT(hn_vfmap[ifp->if_index] == sc->hn_ifp, 2063 ("%s: ifindex %d was mapped to %s", 2064 ifp->if_xname, ifp->if_index, 2065 hn_vfmap[ifp->if_index]->if_xname)); 2066 hn_vfmap[ifp->if_index] = NULL; 2067 } 2068 2069 rm_wunlock(&hn_vfmap_lock); 2070done: 2071 HN_UNLOCK(sc); 2072} 2073 2074static void 2075hn_ifnet_lnkevent(void *xsc, struct ifnet *ifp, int link_state) 2076{ 2077 struct hn_softc *sc = xsc; 2078 2079 if (sc->hn_vf_ifp == ifp) 2080 if_link_state_change(sc->hn_ifp, link_state); 2081} 2082 2083static int 2084hn_probe(device_t dev) 2085{ 2086 2087 if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, &hn_guid) == 0) { 2088 device_set_desc(dev, "Hyper-V Network Interface"); 2089 return BUS_PROBE_DEFAULT; 2090 } 2091 return ENXIO; 2092} 2093 2094static int 2095hn_attach(device_t dev) 2096{ 2097 struct hn_softc *sc = device_get_softc(dev); 2098 struct sysctl_oid_list *child; 2099 struct sysctl_ctx_list *ctx; 2100 uint8_t eaddr[ETHER_ADDR_LEN]; 2101 struct ifnet *ifp = NULL; 2102 int error, ring_cnt, tx_ring_cnt; 2103 uint32_t mtu; 2104 2105 sc->hn_dev = dev; 2106 sc->hn_prichan = vmbus_get_channel(dev); 2107 HN_LOCK_INIT(sc); 2108 rm_init(&sc->hn_vf_lock, "hnvf"); 2109 if (hn_xpnt_vf && hn_xpnt_vf_accbpf) 2110 sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF; 2111 2112 /* 2113 * Initialize these tunables once. 2114 */ 2115 sc->hn_agg_size = hn_tx_agg_size; 2116 sc->hn_agg_pkts = hn_tx_agg_pkts; 2117 2118 /* 2119 * Setup taskqueue for transmission. 2120 */ 2121 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_INDEP) { 2122 int i; 2123 2124 sc->hn_tx_taskqs = 2125 malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *), 2126 M_DEVBUF, M_WAITOK); 2127 for (i = 0; i < hn_tx_taskq_cnt; ++i) { 2128 sc->hn_tx_taskqs[i] = taskqueue_create("hn_tx", 2129 M_WAITOK, taskqueue_thread_enqueue, 2130 &sc->hn_tx_taskqs[i]); 2131 taskqueue_start_threads(&sc->hn_tx_taskqs[i], 1, PI_NET, 2132 "%s tx%d", device_get_nameunit(dev), i); 2133 } 2134 } else if (hn_tx_taskq_mode == HN_TX_TASKQ_M_GLOBAL) { 2135 sc->hn_tx_taskqs = hn_tx_taskque; 2136 } 2137 2138 /* 2139 * Setup taskqueue for mangement tasks, e.g. link status. 2140 */ 2141 sc->hn_mgmt_taskq0 = taskqueue_create("hn_mgmt", M_WAITOK, 2142 taskqueue_thread_enqueue, &sc->hn_mgmt_taskq0); 2143 taskqueue_start_threads(&sc->hn_mgmt_taskq0, 1, PI_NET, "%s mgmt", 2144 device_get_nameunit(dev)); 2145 TASK_INIT(&sc->hn_link_task, 0, hn_link_taskfunc, sc); 2146 TASK_INIT(&sc->hn_netchg_init, 0, hn_netchg_init_taskfunc, sc); 2147 TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0, 2148 hn_netchg_status_taskfunc, sc); 2149 2150 if (hn_xpnt_vf) { 2151 /* 2152 * Setup taskqueue for VF tasks, e.g. delayed VF bringing up. 2153 */ 2154 sc->hn_vf_taskq = taskqueue_create("hn_vf", M_WAITOK, 2155 taskqueue_thread_enqueue, &sc->hn_vf_taskq); 2156 taskqueue_start_threads(&sc->hn_vf_taskq, 1, PI_NET, "%s vf", 2157 device_get_nameunit(dev)); 2158 TIMEOUT_TASK_INIT(sc->hn_vf_taskq, &sc->hn_vf_init, 0, 2159 hn_xpnt_vf_init_taskfunc, sc); 2160 } 2161 2162 /* 2163 * Allocate ifnet and setup its name earlier, so that if_printf 2164 * can be used by functions, which will be called after 2165 * ether_ifattach(). 2166 */ 2167 ifp = sc->hn_ifp = if_alloc(IFT_ETHER); 2168 ifp->if_softc = sc; 2169 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 2170 2171 /* 2172 * Initialize ifmedia earlier so that it can be unconditionally 2173 * destroyed, if error happened later on. 2174 */ 2175 ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts); 2176 2177 /* 2178 * Figure out the # of RX rings (ring_cnt) and the # of TX rings 2179 * to use (tx_ring_cnt). 2180 * 2181 * NOTE: 2182 * The # of RX rings to use is same as the # of channels to use. 2183 */ 2184 ring_cnt = hn_chan_cnt; 2185 if (ring_cnt <= 0) { 2186 /* Default */ 2187 ring_cnt = mp_ncpus; 2188 if (ring_cnt > HN_RING_CNT_DEF_MAX) 2189 ring_cnt = HN_RING_CNT_DEF_MAX; 2190 } else if (ring_cnt > mp_ncpus) { 2191 ring_cnt = mp_ncpus; 2192 } 2193#ifdef RSS 2194 if (ring_cnt > rss_getnumbuckets()) 2195 ring_cnt = rss_getnumbuckets(); 2196#endif 2197 2198 tx_ring_cnt = hn_tx_ring_cnt; 2199 if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt) 2200 tx_ring_cnt = ring_cnt; 2201#ifdef HN_IFSTART_SUPPORT 2202 if (hn_use_if_start) { 2203 /* ifnet.if_start only needs one TX ring. */ 2204 tx_ring_cnt = 1; 2205 } 2206#endif 2207 2208 /* 2209 * Set the leader CPU for channels. 2210 */ 2211 sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus; 2212 2213 /* 2214 * Create enough TX/RX rings, even if only limited number of 2215 * channels can be allocated. 2216 */ 2217 error = hn_create_tx_data(sc, tx_ring_cnt); 2218 if (error) 2219 goto failed; 2220 error = hn_create_rx_data(sc, ring_cnt); 2221 if (error) 2222 goto failed; 2223 2224 /* 2225 * Create transaction context for NVS and RNDIS transactions. 2226 */ 2227 sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev), 2228 HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0); 2229 if (sc->hn_xact == NULL) { 2230 error = ENXIO; 2231 goto failed; 2232 } 2233 2234 /* 2235 * Install orphan handler for the revocation of this device's 2236 * primary channel. 2237 * 2238 * NOTE: 2239 * The processing order is critical here: 2240 * Install the orphan handler, _before_ testing whether this 2241 * device's primary channel has been revoked or not. 2242 */ 2243 vmbus_chan_set_orphan(sc->hn_prichan, sc->hn_xact); 2244 if (vmbus_chan_is_revoked(sc->hn_prichan)) { 2245 error = ENXIO; 2246 goto failed; 2247 } 2248 2249 /* 2250 * Attach the synthetic parts, i.e. NVS and RNDIS. 2251 */ 2252 error = hn_synth_attach(sc, ETHERMTU); 2253 if (error) 2254 goto failed; 2255 2256 error = hn_rndis_get_eaddr(sc, eaddr); 2257 if (error) 2258 goto failed; 2259 2260 error = hn_rndis_get_mtu(sc, &mtu); 2261 if (error) 2262 mtu = ETHERMTU; 2263 else if (bootverbose) 2264 device_printf(dev, "RNDIS mtu %u\n", mtu); 2265 2266#if __FreeBSD_version >= 1100099 2267 if (sc->hn_rx_ring_inuse > 1) { 2268 /* 2269 * Reduce TCP segment aggregation limit for multiple 2270 * RX rings to increase ACK timeliness. 2271 */ 2272 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF); 2273 } 2274#endif 2275 2276 /* 2277 * Fixup TX/RX stuffs after synthetic parts are attached. 2278 */ 2279 hn_fixup_tx_data(sc); 2280 hn_fixup_rx_data(sc); 2281 2282 ctx = device_get_sysctl_ctx(dev); 2283 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 2284 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD, 2285 &sc->hn_nvs_ver, 0, "NVS version"); 2286 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version", 2287 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2288 hn_ndis_version_sysctl, "A", "NDIS version"); 2289 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "caps", 2290 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2291 hn_caps_sysctl, "A", "capabilities"); 2292 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist", 2293 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2294 hn_hwassist_sysctl, "A", "hwassist"); 2295 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_max", 2296 CTLFLAG_RD, &ifp->if_hw_tsomax, 0, "max TSO size"); 2297 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegcnt", 2298 CTLFLAG_RD, &ifp->if_hw_tsomaxsegcount, 0, 2299 "max # of TSO segments"); 2300 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegsz", 2301 CTLFLAG_RD, &ifp->if_hw_tsomaxsegsize, 0, 2302 "max size of TSO segment"); 2303 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter", 2304 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2305 hn_rxfilter_sysctl, "A", "rxfilter"); 2306 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hash", 2307 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2308 hn_rss_hash_sysctl, "A", "RSS hash"); 2309 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hashcap", 2310 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2311 hn_rss_hcap_sysctl, "A", "RSS hash capabilities"); 2312 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "mbuf_hash", 2313 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2314 hn_rss_mbuf_sysctl, "A", "RSS hash for mbufs"); 2315 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rss_ind_size", 2316 CTLFLAG_RD, &sc->hn_rss_ind_size, 0, "RSS indirect entry count"); 2317#ifndef RSS 2318 /* 2319 * Don't allow RSS key/indirect table changes, if RSS is defined. 2320 */ 2321 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_key", 2322 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2323 hn_rss_key_sysctl, "IU", "RSS key"); 2324 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind", 2325 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2326 hn_rss_ind_sysctl, "IU", "RSS indirect table"); 2327#endif 2328 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_size", 2329 CTLFLAG_RD, &sc->hn_rndis_agg_size, 0, 2330 "RNDIS offered packet transmission aggregation size limit"); 2331 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_pkts", 2332 CTLFLAG_RD, &sc->hn_rndis_agg_pkts, 0, 2333 "RNDIS offered packet transmission aggregation count limit"); 2334 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_align", 2335 CTLFLAG_RD, &sc->hn_rndis_agg_align, 0, 2336 "RNDIS packet transmission aggregation alignment"); 2337 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_size", 2338 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2339 hn_txagg_size_sysctl, "I", 2340 "Packet transmission aggregation size, 0 -- disable, -1 -- auto"); 2341 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pkts", 2342 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2343 hn_txagg_pkts_sysctl, "I", 2344 "Packet transmission aggregation packets, " 2345 "0 -- disable, -1 -- auto"); 2346 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "polling", 2347 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2348 hn_polling_sysctl, "I", 2349 "Polling frequency: [100,1000000], 0 disable polling"); 2350 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf", 2351 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2352 hn_vf_sysctl, "A", "Virtual Function's name"); 2353 if (!hn_xpnt_vf) { 2354 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxvf", 2355 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2356 hn_rxvf_sysctl, "A", "activated Virtual Function's name"); 2357 } else { 2358 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_enabled", 2359 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2360 hn_xpnt_vf_enabled_sysctl, "I", 2361 "Transparent VF enabled"); 2362 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_accbpf", 2363 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2364 hn_xpnt_vf_accbpf_sysctl, "I", 2365 "Accurate BPF for transparent VF"); 2366 } 2367 2368 /* 2369 * Setup the ifmedia, which has been initialized earlier. 2370 */ 2371 ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL); 2372 ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO); 2373 /* XXX ifmedia_set really should do this for us */ 2374 sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media; 2375 2376 /* 2377 * Setup the ifnet for this interface. 2378 */ 2379 2380 ifp->if_baudrate = IF_Gbps(10); 2381 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 2382 ifp->if_ioctl = hn_ioctl; 2383 ifp->if_init = hn_init; 2384#ifdef HN_IFSTART_SUPPORT 2385 if (hn_use_if_start) { 2386 int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]); 2387 2388 ifp->if_start = hn_start; 2389 IFQ_SET_MAXLEN(&ifp->if_snd, qdepth); 2390 ifp->if_snd.ifq_drv_maxlen = qdepth - 1; 2391 IFQ_SET_READY(&ifp->if_snd); 2392 } else 2393#endif 2394 { 2395 ifp->if_transmit = hn_transmit; 2396 ifp->if_qflush = hn_xmit_qflush; 2397 } 2398 2399 ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO | IFCAP_LINKSTATE; 2400#ifdef foo 2401 /* We can't diff IPv6 packets from IPv4 packets on RX path. */ 2402 ifp->if_capabilities |= IFCAP_RXCSUM_IPV6; 2403#endif 2404 if (sc->hn_caps & HN_CAP_VLAN) { 2405 /* XXX not sure about VLAN_MTU. */ 2406 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; 2407 } 2408 2409 ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist; 2410 if (ifp->if_hwassist & HN_CSUM_IP_MASK) 2411 ifp->if_capabilities |= IFCAP_TXCSUM; 2412 if (ifp->if_hwassist & HN_CSUM_IP6_MASK) 2413 ifp->if_capabilities |= IFCAP_TXCSUM_IPV6; 2414 if (sc->hn_caps & HN_CAP_TSO4) { 2415 ifp->if_capabilities |= IFCAP_TSO4; 2416 ifp->if_hwassist |= CSUM_IP_TSO; 2417 } 2418 if (sc->hn_caps & HN_CAP_TSO6) { 2419 ifp->if_capabilities |= IFCAP_TSO6; 2420 ifp->if_hwassist |= CSUM_IP6_TSO; 2421 } 2422 2423 /* Enable all available capabilities by default. */ 2424 ifp->if_capenable = ifp->if_capabilities; 2425 2426 /* 2427 * Disable IPv6 TSO and TXCSUM by default, they still can 2428 * be enabled through SIOCSIFCAP. 2429 */ 2430 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); 2431 ifp->if_hwassist &= ~(HN_CSUM_IP6_MASK | CSUM_IP6_TSO); 2432 2433 if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) { 2434 /* 2435 * Lock hn_set_tso_maxsize() to simplify its 2436 * internal logic. 2437 */ 2438 HN_LOCK(sc); 2439 hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU); 2440 HN_UNLOCK(sc); 2441 ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX; 2442 ifp->if_hw_tsomaxsegsize = PAGE_SIZE; 2443 } 2444 2445 ether_ifattach(ifp, eaddr); 2446 2447 if ((ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) && bootverbose) { 2448 if_printf(ifp, "TSO segcnt %u segsz %u\n", 2449 ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize); 2450 } 2451 if (mtu < ETHERMTU) { 2452 if_printf(ifp, "fixup mtu %u -> %u\n", ifp->if_mtu, mtu); 2453 ifp->if_mtu = mtu; 2454 } 2455 2456 /* Inform the upper layer about the long frame support. */ 2457 ifp->if_hdrlen = sizeof(struct ether_vlan_header); 2458 2459 /* 2460 * Kick off link status check. 2461 */ 2462 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; 2463 hn_update_link_status(sc); 2464 2465 if (!hn_xpnt_vf) { 2466 sc->hn_ifnet_evthand = EVENTHANDLER_REGISTER(ifnet_event, 2467 hn_ifnet_event, sc, EVENTHANDLER_PRI_ANY); 2468 sc->hn_ifaddr_evthand = EVENTHANDLER_REGISTER(ifaddr_event, 2469 hn_ifaddr_event, sc, EVENTHANDLER_PRI_ANY); 2470 } else { 2471 sc->hn_ifnet_lnkhand = EVENTHANDLER_REGISTER(ifnet_link_event, 2472 hn_ifnet_lnkevent, sc, EVENTHANDLER_PRI_ANY); 2473 } 2474 2475 /* 2476 * NOTE: 2477 * Subscribe ether_ifattach event, instead of ifnet_arrival event, 2478 * since interface's LLADDR is needed; interface LLADDR is not 2479 * available when ifnet_arrival event is triggered. 2480 */ 2481 sc->hn_ifnet_atthand = EVENTHANDLER_REGISTER(ether_ifattach_event, 2482 hn_ifnet_attevent, sc, EVENTHANDLER_PRI_ANY); 2483 sc->hn_ifnet_dethand = EVENTHANDLER_REGISTER(ifnet_departure_event, 2484 hn_ifnet_detevent, sc, EVENTHANDLER_PRI_ANY); 2485 2486 return (0); 2487failed: 2488 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) 2489 hn_synth_detach(sc); 2490 hn_detach(dev); 2491 return (error); 2492} 2493 2494static int 2495hn_detach(device_t dev) 2496{ 2497 struct hn_softc *sc = device_get_softc(dev); 2498 struct ifnet *ifp = sc->hn_ifp, *vf_ifp; 2499 2500 if (sc->hn_xact != NULL && vmbus_chan_is_revoked(sc->hn_prichan)) { 2501 /* 2502 * In case that the vmbus missed the orphan handler 2503 * installation. 2504 */ 2505 vmbus_xact_ctx_orphan(sc->hn_xact); 2506 } 2507 2508 if (sc->hn_ifaddr_evthand != NULL) 2509 EVENTHANDLER_DEREGISTER(ifaddr_event, sc->hn_ifaddr_evthand); 2510 if (sc->hn_ifnet_evthand != NULL) 2511 EVENTHANDLER_DEREGISTER(ifnet_event, sc->hn_ifnet_evthand); 2512 if (sc->hn_ifnet_atthand != NULL) { 2513 EVENTHANDLER_DEREGISTER(ether_ifattach_event, 2514 sc->hn_ifnet_atthand); 2515 } 2516 if (sc->hn_ifnet_dethand != NULL) { 2517 EVENTHANDLER_DEREGISTER(ifnet_departure_event, 2518 sc->hn_ifnet_dethand); 2519 } 2520 if (sc->hn_ifnet_lnkhand != NULL) 2521 EVENTHANDLER_DEREGISTER(ifnet_link_event, sc->hn_ifnet_lnkhand); 2522 2523 vf_ifp = sc->hn_vf_ifp; 2524 __compiler_membar(); 2525 if (vf_ifp != NULL) 2526 hn_ifnet_detevent(sc, vf_ifp); 2527 2528 if (device_is_attached(dev)) { 2529 HN_LOCK(sc); 2530 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 2531 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 2532 hn_stop(sc, true); 2533 /* 2534 * NOTE: 2535 * hn_stop() only suspends data, so managment 2536 * stuffs have to be suspended manually here. 2537 */ 2538 hn_suspend_mgmt(sc); 2539 hn_synth_detach(sc); 2540 } 2541 HN_UNLOCK(sc); 2542 ether_ifdetach(ifp); 2543 } 2544 2545 ifmedia_removeall(&sc->hn_media); 2546 hn_destroy_rx_data(sc); 2547 hn_destroy_tx_data(sc); 2548 2549 if (sc->hn_tx_taskqs != NULL && sc->hn_tx_taskqs != hn_tx_taskque) { 2550 int i; 2551 2552 for (i = 0; i < hn_tx_taskq_cnt; ++i) 2553 taskqueue_free(sc->hn_tx_taskqs[i]); 2554 free(sc->hn_tx_taskqs, M_DEVBUF); 2555 } 2556 taskqueue_free(sc->hn_mgmt_taskq0); 2557 if (sc->hn_vf_taskq != NULL) 2558 taskqueue_free(sc->hn_vf_taskq); 2559 2560 if (sc->hn_xact != NULL) { 2561 /* 2562 * Uninstall the orphan handler _before_ the xact is 2563 * destructed. 2564 */ 2565 vmbus_chan_unset_orphan(sc->hn_prichan); 2566 vmbus_xact_ctx_destroy(sc->hn_xact); 2567 } 2568 2569 if_free(ifp); 2570 2571 HN_LOCK_DESTROY(sc); 2572 rm_destroy(&sc->hn_vf_lock); 2573 return (0); 2574} 2575 2576static int 2577hn_shutdown(device_t dev) 2578{ 2579 2580 return (0); 2581} 2582 2583static void 2584hn_link_status(struct hn_softc *sc) 2585{ 2586 uint32_t link_status; 2587 int error; 2588 2589 error = hn_rndis_get_linkstatus(sc, &link_status); 2590 if (error) { 2591 /* XXX what to do? */ 2592 return; 2593 } 2594 2595 if (link_status == NDIS_MEDIA_STATE_CONNECTED) 2596 sc->hn_link_flags |= HN_LINK_FLAG_LINKUP; 2597 else 2598 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; 2599 if_link_state_change(sc->hn_ifp, 2600 (sc->hn_link_flags & HN_LINK_FLAG_LINKUP) ? 2601 LINK_STATE_UP : LINK_STATE_DOWN); 2602} 2603 2604static void 2605hn_link_taskfunc(void *xsc, int pending __unused) 2606{ 2607 struct hn_softc *sc = xsc; 2608 2609 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) 2610 return; 2611 hn_link_status(sc); 2612} 2613 2614static void 2615hn_netchg_init_taskfunc(void *xsc, int pending __unused) 2616{ 2617 struct hn_softc *sc = xsc; 2618 2619 /* Prevent any link status checks from running. */ 2620 sc->hn_link_flags |= HN_LINK_FLAG_NETCHG; 2621 2622 /* 2623 * Fake up a [link down --> link up] state change; 5 seconds 2624 * delay is used, which closely simulates miibus reaction 2625 * upon link down event. 2626 */ 2627 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; 2628 if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN); 2629 taskqueue_enqueue_timeout(sc->hn_mgmt_taskq0, 2630 &sc->hn_netchg_status, 5 * hz); 2631} 2632 2633static void 2634hn_netchg_status_taskfunc(void *xsc, int pending __unused) 2635{ 2636 struct hn_softc *sc = xsc; 2637 2638 /* Re-allow link status checks. */ 2639 sc->hn_link_flags &= ~HN_LINK_FLAG_NETCHG; 2640 hn_link_status(sc); 2641} 2642 2643static void 2644hn_update_link_status(struct hn_softc *sc) 2645{ 2646 2647 if (sc->hn_mgmt_taskq != NULL) 2648 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_link_task); 2649} 2650 2651static void 2652hn_change_network(struct hn_softc *sc) 2653{ 2654 2655 if (sc->hn_mgmt_taskq != NULL) 2656 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_netchg_init); 2657} 2658 2659static __inline int 2660hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd, 2661 struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs) 2662{ 2663 struct mbuf *m = *m_head; 2664 int error; 2665 2666 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("txd uses chim")); 2667 2668 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap, 2669 m, segs, nsegs, BUS_DMA_NOWAIT); 2670 if (error == EFBIG) { 2671 struct mbuf *m_new; 2672 2673 m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX); 2674 if (m_new == NULL) 2675 return ENOBUFS; 2676 else 2677 *m_head = m = m_new; 2678 txr->hn_tx_collapsed++; 2679 2680 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, 2681 txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); 2682 } 2683 if (!error) { 2684 bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap, 2685 BUS_DMASYNC_PREWRITE); 2686 txd->flags |= HN_TXD_FLAG_DMAMAP; 2687 } 2688 return error; 2689} 2690 2691static __inline int 2692hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd) 2693{ 2694 2695 KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0, 2696 ("put an onlist txd %#x", txd->flags)); 2697 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0, 2698 ("put an onagg txd %#x", txd->flags)); 2699 2700 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); 2701 if (atomic_fetchadd_int(&txd->refs, -1) != 1) 2702 return 0; 2703 2704 if (!STAILQ_EMPTY(&txd->agg_list)) { 2705 struct hn_txdesc *tmp_txd; 2706 2707 while ((tmp_txd = STAILQ_FIRST(&txd->agg_list)) != NULL) { 2708 int freed; 2709 2710 KASSERT(STAILQ_EMPTY(&tmp_txd->agg_list), 2711 ("resursive aggregation on aggregated txdesc")); 2712 KASSERT((tmp_txd->flags & HN_TXD_FLAG_ONAGG), 2713 ("not aggregated txdesc")); 2714 KASSERT((tmp_txd->flags & HN_TXD_FLAG_DMAMAP) == 0, 2715 ("aggregated txdesc uses dmamap")); 2716 KASSERT(tmp_txd->chim_index == HN_NVS_CHIM_IDX_INVALID, 2717 ("aggregated txdesc consumes " 2718 "chimney sending buffer")); 2719 KASSERT(tmp_txd->chim_size == 0, 2720 ("aggregated txdesc has non-zero " 2721 "chimney sending size")); 2722 2723 STAILQ_REMOVE_HEAD(&txd->agg_list, agg_link); 2724 tmp_txd->flags &= ~HN_TXD_FLAG_ONAGG; 2725 freed = hn_txdesc_put(txr, tmp_txd); 2726 KASSERT(freed, ("failed to free aggregated txdesc")); 2727 } 2728 } 2729 2730 if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) { 2731 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, 2732 ("chim txd uses dmamap")); 2733 hn_chim_free(txr->hn_sc, txd->chim_index); 2734 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 2735 txd->chim_size = 0; 2736 } else if (txd->flags & HN_TXD_FLAG_DMAMAP) { 2737 bus_dmamap_sync(txr->hn_tx_data_dtag, 2738 txd->data_dmap, BUS_DMASYNC_POSTWRITE); 2739 bus_dmamap_unload(txr->hn_tx_data_dtag, 2740 txd->data_dmap); 2741 txd->flags &= ~HN_TXD_FLAG_DMAMAP; 2742 } 2743 2744 if (txd->m != NULL) { 2745 m_freem(txd->m); 2746 txd->m = NULL; 2747 } 2748 2749 txd->flags |= HN_TXD_FLAG_ONLIST; 2750#ifndef HN_USE_TXDESC_BUFRING 2751 mtx_lock_spin(&txr->hn_txlist_spin); 2752 KASSERT(txr->hn_txdesc_avail >= 0 && 2753 txr->hn_txdesc_avail < txr->hn_txdesc_cnt, 2754 ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail)); 2755 txr->hn_txdesc_avail++; 2756 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); 2757 mtx_unlock_spin(&txr->hn_txlist_spin); 2758#else /* HN_USE_TXDESC_BUFRING */ 2759#ifdef HN_DEBUG 2760 atomic_add_int(&txr->hn_txdesc_avail, 1); 2761#endif 2762 buf_ring_enqueue(txr->hn_txdesc_br, txd); 2763#endif /* !HN_USE_TXDESC_BUFRING */ 2764 2765 return 1; 2766} 2767 2768static __inline struct hn_txdesc * 2769hn_txdesc_get(struct hn_tx_ring *txr) 2770{ 2771 struct hn_txdesc *txd; 2772 2773#ifndef HN_USE_TXDESC_BUFRING 2774 mtx_lock_spin(&txr->hn_txlist_spin); 2775 txd = SLIST_FIRST(&txr->hn_txlist); 2776 if (txd != NULL) { 2777 KASSERT(txr->hn_txdesc_avail > 0, 2778 ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail)); 2779 txr->hn_txdesc_avail--; 2780 SLIST_REMOVE_HEAD(&txr->hn_txlist, link); 2781 } 2782 mtx_unlock_spin(&txr->hn_txlist_spin); 2783#else 2784 txd = buf_ring_dequeue_sc(txr->hn_txdesc_br); 2785#endif 2786 2787 if (txd != NULL) { 2788#ifdef HN_USE_TXDESC_BUFRING 2789#ifdef HN_DEBUG 2790 atomic_subtract_int(&txr->hn_txdesc_avail, 1); 2791#endif 2792#endif /* HN_USE_TXDESC_BUFRING */ 2793 KASSERT(txd->m == NULL && txd->refs == 0 && 2794 STAILQ_EMPTY(&txd->agg_list) && 2795 txd->chim_index == HN_NVS_CHIM_IDX_INVALID && 2796 txd->chim_size == 0 && 2797 (txd->flags & HN_TXD_FLAG_ONLIST) && 2798 (txd->flags & HN_TXD_FLAG_ONAGG) == 0 && 2799 (txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("invalid txd")); 2800 txd->flags &= ~HN_TXD_FLAG_ONLIST; 2801 txd->refs = 1; 2802 } 2803 return txd; 2804} 2805 2806static __inline void 2807hn_txdesc_hold(struct hn_txdesc *txd) 2808{ 2809 2810 /* 0->1 transition will never work */ 2811 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); 2812 atomic_add_int(&txd->refs, 1); 2813} 2814 2815static __inline void 2816hn_txdesc_agg(struct hn_txdesc *agg_txd, struct hn_txdesc *txd) 2817{ 2818 2819 KASSERT((agg_txd->flags & HN_TXD_FLAG_ONAGG) == 0, 2820 ("recursive aggregation on aggregating txdesc")); 2821 2822 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0, 2823 ("already aggregated")); 2824 KASSERT(STAILQ_EMPTY(&txd->agg_list), 2825 ("recursive aggregation on to-be-aggregated txdesc")); 2826 2827 txd->flags |= HN_TXD_FLAG_ONAGG; 2828 STAILQ_INSERT_TAIL(&agg_txd->agg_list, txd, agg_link); 2829} 2830 2831static bool 2832hn_tx_ring_pending(struct hn_tx_ring *txr) 2833{ 2834 bool pending = false; 2835 2836#ifndef HN_USE_TXDESC_BUFRING 2837 mtx_lock_spin(&txr->hn_txlist_spin); 2838 if (txr->hn_txdesc_avail != txr->hn_txdesc_cnt) 2839 pending = true; 2840 mtx_unlock_spin(&txr->hn_txlist_spin); 2841#else 2842 if (!buf_ring_full(txr->hn_txdesc_br)) 2843 pending = true; 2844#endif 2845 return (pending); 2846} 2847 2848static __inline void 2849hn_txeof(struct hn_tx_ring *txr) 2850{ 2851 txr->hn_has_txeof = 0; 2852 txr->hn_txeof(txr); 2853} 2854 2855static void 2856hn_txpkt_done(struct hn_nvs_sendctx *sndc, struct hn_softc *sc, 2857 struct vmbus_channel *chan, const void *data __unused, int dlen __unused) 2858{ 2859 struct hn_txdesc *txd = sndc->hn_cbarg; 2860 struct hn_tx_ring *txr; 2861 2862 txr = txd->txr; 2863 KASSERT(txr->hn_chan == chan, 2864 ("channel mismatch, on chan%u, should be chan%u", 2865 vmbus_chan_id(chan), vmbus_chan_id(txr->hn_chan))); 2866 2867 txr->hn_has_txeof = 1; 2868 hn_txdesc_put(txr, txd); 2869 2870 ++txr->hn_txdone_cnt; 2871 if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) { 2872 txr->hn_txdone_cnt = 0; 2873 if (txr->hn_oactive) 2874 hn_txeof(txr); 2875 } 2876} 2877 2878static void 2879hn_chan_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr) 2880{ 2881#if defined(INET) || defined(INET6) 2882 tcp_lro_flush_all(&rxr->hn_lro); 2883#endif 2884 2885 /* 2886 * NOTE: 2887 * 'txr' could be NULL, if multiple channels and 2888 * ifnet.if_start method are enabled. 2889 */ 2890 if (txr == NULL || !txr->hn_has_txeof) 2891 return; 2892 2893 txr->hn_txdone_cnt = 0; 2894 hn_txeof(txr); 2895} 2896 2897static __inline uint32_t 2898hn_rndis_pktmsg_offset(uint32_t ofs) 2899{ 2900 2901 KASSERT(ofs >= sizeof(struct rndis_packet_msg), 2902 ("invalid RNDIS packet msg offset %u", ofs)); 2903 return (ofs - __offsetof(struct rndis_packet_msg, rm_dataoffset)); 2904} 2905 2906static __inline void * 2907hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize, 2908 size_t pi_dlen, uint32_t pi_type) 2909{ 2910 const size_t pi_size = HN_RNDIS_PKTINFO_SIZE(pi_dlen); 2911 struct rndis_pktinfo *pi; 2912 2913 KASSERT((pi_size & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK) == 0, 2914 ("unaligned pktinfo size %zu, pktinfo dlen %zu", pi_size, pi_dlen)); 2915 2916 /* 2917 * Per-packet-info does not move; it only grows. 2918 * 2919 * NOTE: 2920 * rm_pktinfooffset in this phase counts from the beginning 2921 * of rndis_packet_msg. 2922 */ 2923 KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <= pktsize, 2924 ("%u pktinfo overflows RNDIS packet msg", pi_type)); 2925 pi = (struct rndis_pktinfo *)((uint8_t *)pkt + pkt->rm_pktinfooffset + 2926 pkt->rm_pktinfolen); 2927 pkt->rm_pktinfolen += pi_size; 2928 2929 pi->rm_size = pi_size; 2930 pi->rm_type = pi_type; 2931 pi->rm_internal = 0; 2932 pi->rm_pktinfooffset = RNDIS_PKTINFO_OFFSET; 2933 2934 return (pi->rm_data); 2935} 2936 2937static __inline int 2938hn_flush_txagg(struct ifnet *ifp, struct hn_tx_ring *txr) 2939{ 2940 struct hn_txdesc *txd; 2941 struct mbuf *m; 2942 int error, pkts; 2943 2944 txd = txr->hn_agg_txd; 2945 KASSERT(txd != NULL, ("no aggregate txdesc")); 2946 2947 /* 2948 * Since hn_txpkt() will reset this temporary stat, save 2949 * it now, so that oerrors can be updated properly, if 2950 * hn_txpkt() ever fails. 2951 */ 2952 pkts = txr->hn_stat_pkts; 2953 2954 /* 2955 * Since txd's mbuf will _not_ be freed upon hn_txpkt() 2956 * failure, save it for later freeing, if hn_txpkt() ever 2957 * fails. 2958 */ 2959 m = txd->m; 2960 error = hn_txpkt(ifp, txr, txd); 2961 if (__predict_false(error)) { 2962 /* txd is freed, but m is not. */ 2963 m_freem(m); 2964 2965 txr->hn_flush_failed++; 2966 if_inc_counter(ifp, IFCOUNTER_OERRORS, pkts); 2967 } 2968 2969 /* Reset all aggregation states. */ 2970 txr->hn_agg_txd = NULL; 2971 txr->hn_agg_szleft = 0; 2972 txr->hn_agg_pktleft = 0; 2973 txr->hn_agg_prevpkt = NULL; 2974 2975 return (error); 2976} 2977 2978static void * 2979hn_try_txagg(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd, 2980 int pktsize) 2981{ 2982 void *chim; 2983 2984 if (txr->hn_agg_txd != NULL) { 2985 if (txr->hn_agg_pktleft >= 1 && txr->hn_agg_szleft > pktsize) { 2986 struct hn_txdesc *agg_txd = txr->hn_agg_txd; 2987 struct rndis_packet_msg *pkt = txr->hn_agg_prevpkt; 2988 int olen; 2989 2990 /* 2991 * Update the previous RNDIS packet's total length, 2992 * it can be increased due to the mandatory alignment 2993 * padding for this RNDIS packet. And update the 2994 * aggregating txdesc's chimney sending buffer size 2995 * accordingly. 2996 * 2997 * XXX 2998 * Zero-out the padding, as required by the RNDIS spec. 2999 */ 3000 olen = pkt->rm_len; 3001 pkt->rm_len = roundup2(olen, txr->hn_agg_align); 3002 agg_txd->chim_size += pkt->rm_len - olen; 3003 3004 /* Link this txdesc to the parent. */ 3005 hn_txdesc_agg(agg_txd, txd); 3006 3007 chim = (uint8_t *)pkt + pkt->rm_len; 3008 /* Save the current packet for later fixup. */ 3009 txr->hn_agg_prevpkt = chim; 3010 3011 txr->hn_agg_pktleft--; 3012 txr->hn_agg_szleft -= pktsize; 3013 if (txr->hn_agg_szleft <= 3014 HN_PKTSIZE_MIN(txr->hn_agg_align)) { 3015 /* 3016 * Probably can't aggregate more packets, 3017 * flush this aggregating txdesc proactively. 3018 */ 3019 txr->hn_agg_pktleft = 0; 3020 } 3021 /* Done! */ 3022 return (chim); 3023 } 3024 hn_flush_txagg(ifp, txr); 3025 } 3026 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 3027 3028 txr->hn_tx_chimney_tried++; 3029 txd->chim_index = hn_chim_alloc(txr->hn_sc); 3030 if (txd->chim_index == HN_NVS_CHIM_IDX_INVALID) 3031 return (NULL); 3032 txr->hn_tx_chimney++; 3033 3034 chim = txr->hn_sc->hn_chim + 3035 (txd->chim_index * txr->hn_sc->hn_chim_szmax); 3036 3037 if (txr->hn_agg_pktmax > 1 && 3038 txr->hn_agg_szmax > pktsize + HN_PKTSIZE_MIN(txr->hn_agg_align)) { 3039 txr->hn_agg_txd = txd; 3040 txr->hn_agg_pktleft = txr->hn_agg_pktmax - 1; 3041 txr->hn_agg_szleft = txr->hn_agg_szmax - pktsize; 3042 txr->hn_agg_prevpkt = chim; 3043 } 3044 return (chim); 3045} 3046 3047/* 3048 * NOTE: 3049 * If this function fails, then both txd and m_head0 will be freed. 3050 */ 3051static int 3052hn_encap(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd, 3053 struct mbuf **m_head0) 3054{ 3055 bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX]; 3056 int error, nsegs, i; 3057 struct mbuf *m_head = *m_head0; 3058 struct rndis_packet_msg *pkt; 3059 uint32_t *pi_data; 3060 void *chim = NULL; 3061 int pkt_hlen, pkt_size; 3062 3063 pkt = txd->rndis_pkt; 3064 pkt_size = HN_PKTSIZE(m_head, txr->hn_agg_align); 3065 if (pkt_size < txr->hn_chim_size) { 3066 chim = hn_try_txagg(ifp, txr, txd, pkt_size); 3067 if (chim != NULL) 3068 pkt = chim; 3069 } else { 3070 if (txr->hn_agg_txd != NULL) 3071 hn_flush_txagg(ifp, txr); 3072 } 3073 3074 pkt->rm_type = REMOTE_NDIS_PACKET_MSG; 3075 pkt->rm_len = m_head->m_pkthdr.len; 3076 pkt->rm_dataoffset = 0; 3077 pkt->rm_datalen = m_head->m_pkthdr.len; 3078 pkt->rm_oobdataoffset = 0; 3079 pkt->rm_oobdatalen = 0; 3080 pkt->rm_oobdataelements = 0; 3081 pkt->rm_pktinfooffset = sizeof(*pkt); 3082 pkt->rm_pktinfolen = 0; 3083 pkt->rm_vchandle = 0; 3084 pkt->rm_reserved = 0; 3085 3086 if (txr->hn_tx_flags & HN_TX_FLAG_HASHVAL) { 3087 /* 3088 * Set the hash value for this packet. 3089 */ 3090 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 3091 HN_NDIS_HASH_VALUE_SIZE, HN_NDIS_PKTINFO_TYPE_HASHVAL); 3092 3093 if (M_HASHTYPE_ISHASH(m_head)) 3094 /* 3095 * The flowid field contains the hash value host 3096 * set in the rx queue if it is a ip forwarding pkt. 3097 * Set the same hash value so host can send on the 3098 * cpu it was received. 3099 */ 3100 *pi_data = m_head->m_pkthdr.flowid; 3101 else 3102 /* 3103 * Otherwise just put the tx queue index. 3104 */ 3105 *pi_data = txr->hn_tx_idx; 3106 } 3107 3108 if (m_head->m_flags & M_VLANTAG) { 3109 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 3110 NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN); 3111 *pi_data = NDIS_VLAN_INFO_MAKE( 3112 EVL_VLANOFTAG(m_head->m_pkthdr.ether_vtag), 3113 EVL_PRIOFTAG(m_head->m_pkthdr.ether_vtag), 3114 EVL_CFIOFTAG(m_head->m_pkthdr.ether_vtag)); 3115 } 3116 3117 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 3118#if defined(INET6) || defined(INET) 3119 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 3120 NDIS_LSO2_INFO_SIZE, NDIS_PKTINFO_TYPE_LSO); 3121#ifdef INET 3122 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { 3123 *pi_data = NDIS_LSO2_INFO_MAKEIPV4( 3124 m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen, 3125 m_head->m_pkthdr.tso_segsz); 3126 } 3127#endif 3128#if defined(INET6) && defined(INET) 3129 else 3130#endif 3131#ifdef INET6 3132 { 3133 *pi_data = NDIS_LSO2_INFO_MAKEIPV6( 3134 m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen, 3135 m_head->m_pkthdr.tso_segsz); 3136 } 3137#endif 3138#endif /* INET6 || INET */ 3139 } else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) { 3140 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 3141 NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM); 3142 if (m_head->m_pkthdr.csum_flags & 3143 (CSUM_IP6_TCP | CSUM_IP6_UDP)) { 3144 *pi_data = NDIS_TXCSUM_INFO_IPV6; 3145 } else { 3146 *pi_data = NDIS_TXCSUM_INFO_IPV4; 3147 if (m_head->m_pkthdr.csum_flags & CSUM_IP) 3148 *pi_data |= NDIS_TXCSUM_INFO_IPCS; 3149 } 3150 3151 if (m_head->m_pkthdr.csum_flags & 3152 (CSUM_IP_TCP | CSUM_IP6_TCP)) { 3153 *pi_data |= NDIS_TXCSUM_INFO_MKTCPCS( 3154 m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen); 3155 } else if (m_head->m_pkthdr.csum_flags & 3156 (CSUM_IP_UDP | CSUM_IP6_UDP)) { 3157 *pi_data |= NDIS_TXCSUM_INFO_MKUDPCS( 3158 m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen); 3159 } 3160 } 3161 3162 pkt_hlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen; 3163 /* Fixup RNDIS packet message total length */ 3164 pkt->rm_len += pkt_hlen; 3165 /* Convert RNDIS packet message offsets */ 3166 pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt_hlen); 3167 pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset); 3168 3169 /* 3170 * Fast path: Chimney sending. 3171 */ 3172 if (chim != NULL) { 3173 struct hn_txdesc *tgt_txd = txd; 3174 3175 if (txr->hn_agg_txd != NULL) { 3176 tgt_txd = txr->hn_agg_txd; 3177#ifdef INVARIANTS 3178 *m_head0 = NULL; 3179#endif 3180 } 3181 3182 KASSERT(pkt == chim, 3183 ("RNDIS pkt not in chimney sending buffer")); 3184 KASSERT(tgt_txd->chim_index != HN_NVS_CHIM_IDX_INVALID, 3185 ("chimney sending buffer is not used")); 3186 tgt_txd->chim_size += pkt->rm_len; 3187 3188 m_copydata(m_head, 0, m_head->m_pkthdr.len, 3189 ((uint8_t *)chim) + pkt_hlen); 3190 3191 txr->hn_gpa_cnt = 0; 3192 txr->hn_sendpkt = hn_txpkt_chim; 3193 goto done; 3194 } 3195 3196 KASSERT(txr->hn_agg_txd == NULL, ("aggregating sglist txdesc")); 3197 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, 3198 ("chimney buffer is used")); 3199 KASSERT(pkt == txd->rndis_pkt, ("RNDIS pkt not in txdesc")); 3200 3201 error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs); 3202 if (__predict_false(error)) { 3203 int freed; 3204 3205 /* 3206 * This mbuf is not linked w/ the txd yet, so free it now. 3207 */ 3208 m_freem(m_head); 3209 *m_head0 = NULL; 3210 3211 freed = hn_txdesc_put(txr, txd); 3212 KASSERT(freed != 0, 3213 ("fail to free txd upon txdma error")); 3214 3215 txr->hn_txdma_failed++; 3216 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 3217 return error; 3218 } 3219 *m_head0 = m_head; 3220 3221 /* +1 RNDIS packet message */ 3222 txr->hn_gpa_cnt = nsegs + 1; 3223 3224 /* send packet with page buffer */ 3225 txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr); 3226 txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK; 3227 txr->hn_gpa[0].gpa_len = pkt_hlen; 3228 3229 /* 3230 * Fill the page buffers with mbuf info after the page 3231 * buffer for RNDIS packet message. 3232 */ 3233 for (i = 0; i < nsegs; ++i) { 3234 struct vmbus_gpa *gpa = &txr->hn_gpa[i + 1]; 3235 3236 gpa->gpa_page = atop(segs[i].ds_addr); 3237 gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK; 3238 gpa->gpa_len = segs[i].ds_len; 3239 } 3240 3241 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 3242 txd->chim_size = 0; 3243 txr->hn_sendpkt = hn_txpkt_sglist; 3244done: 3245 txd->m = m_head; 3246 3247 /* Set the completion routine */ 3248 hn_nvs_sendctx_init(&txd->send_ctx, hn_txpkt_done, txd); 3249 3250 /* Update temporary stats for later use. */ 3251 txr->hn_stat_pkts++; 3252 txr->hn_stat_size += m_head->m_pkthdr.len; 3253 if (m_head->m_flags & M_MCAST) 3254 txr->hn_stat_mcasts++; 3255 3256 return 0; 3257} 3258 3259/* 3260 * NOTE: 3261 * If this function fails, then txd will be freed, but the mbuf 3262 * associated w/ the txd will _not_ be freed. 3263 */ 3264static int 3265hn_txpkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd) 3266{ 3267 int error, send_failed = 0, has_bpf; 3268 3269again: 3270 has_bpf = bpf_peers_present(ifp->if_bpf); 3271 if (has_bpf) { 3272 /* 3273 * Make sure that this txd and any aggregated txds are not 3274 * freed before ETHER_BPF_MTAP. 3275 */ 3276 hn_txdesc_hold(txd); 3277 } 3278 error = txr->hn_sendpkt(txr, txd); 3279 if (!error) { 3280 if (has_bpf) { 3281 const struct hn_txdesc *tmp_txd; 3282 3283 ETHER_BPF_MTAP(ifp, txd->m); 3284 STAILQ_FOREACH(tmp_txd, &txd->agg_list, agg_link) 3285 ETHER_BPF_MTAP(ifp, tmp_txd->m); 3286 } 3287 3288 if_inc_counter(ifp, IFCOUNTER_OPACKETS, txr->hn_stat_pkts); 3289#ifdef HN_IFSTART_SUPPORT 3290 if (!hn_use_if_start) 3291#endif 3292 { 3293 if_inc_counter(ifp, IFCOUNTER_OBYTES, 3294 txr->hn_stat_size); 3295 if (txr->hn_stat_mcasts != 0) { 3296 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 3297 txr->hn_stat_mcasts); 3298 } 3299 } 3300 txr->hn_pkts += txr->hn_stat_pkts; 3301 txr->hn_sends++; 3302 } 3303 if (has_bpf) 3304 hn_txdesc_put(txr, txd); 3305 3306 if (__predict_false(error)) { 3307 int freed; 3308 3309 /* 3310 * This should "really rarely" happen. 3311 * 3312 * XXX Too many RX to be acked or too many sideband 3313 * commands to run? Ask netvsc_channel_rollup() 3314 * to kick start later. 3315 */ 3316 txr->hn_has_txeof = 1; 3317 if (!send_failed) { 3318 txr->hn_send_failed++; 3319 send_failed = 1; 3320 /* 3321 * Try sending again after set hn_has_txeof; 3322 * in case that we missed the last 3323 * netvsc_channel_rollup(). 3324 */ 3325 goto again; 3326 } 3327 if_printf(ifp, "send failed\n"); 3328 3329 /* 3330 * Caller will perform further processing on the 3331 * associated mbuf, so don't free it in hn_txdesc_put(); 3332 * only unload it from the DMA map in hn_txdesc_put(), 3333 * if it was loaded. 3334 */ 3335 txd->m = NULL; 3336 freed = hn_txdesc_put(txr, txd); 3337 KASSERT(freed != 0, 3338 ("fail to free txd upon send error")); 3339 3340 txr->hn_send_failed++; 3341 } 3342 3343 /* Reset temporary stats, after this sending is done. */ 3344 txr->hn_stat_size = 0; 3345 txr->hn_stat_pkts = 0; 3346 txr->hn_stat_mcasts = 0; 3347 3348 return (error); 3349} 3350 3351/* 3352 * Append the specified data to the indicated mbuf chain, 3353 * Extend the mbuf chain if the new data does not fit in 3354 * existing space. 3355 * 3356 * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c. 3357 * There should be an equivalent in the kernel mbuf code, 3358 * but there does not appear to be one yet. 3359 * 3360 * Differs from m_append() in that additional mbufs are 3361 * allocated with cluster size MJUMPAGESIZE, and filled 3362 * accordingly. 3363 * 3364 * Return the last mbuf in the chain or NULL if failed to 3365 * allocate new mbuf. 3366 */ 3367static struct mbuf * 3368hv_m_append(struct mbuf *m0, int len, c_caddr_t cp) 3369{ 3370 struct mbuf *m, *n; 3371 int remainder, space; 3372 3373 for (m = m0; m->m_next != NULL; m = m->m_next) 3374 ; 3375 remainder = len; 3376 space = M_TRAILINGSPACE(m); 3377 if (space > 0) { 3378 /* 3379 * Copy into available space. 3380 */ 3381 if (space > remainder) 3382 space = remainder; 3383 bcopy(cp, mtod(m, caddr_t) + m->m_len, space); 3384 m->m_len += space; 3385 cp += space; 3386 remainder -= space; 3387 } 3388 while (remainder > 0) { 3389 /* 3390 * Allocate a new mbuf; could check space 3391 * and allocate a cluster instead. 3392 */ 3393 n = m_getjcl(M_NOWAIT, m->m_type, 0, MJUMPAGESIZE); 3394 if (n == NULL) 3395 return NULL; 3396 n->m_len = min(MJUMPAGESIZE, remainder); 3397 bcopy(cp, mtod(n, caddr_t), n->m_len); 3398 cp += n->m_len; 3399 remainder -= n->m_len; 3400 m->m_next = n; 3401 m = n; 3402 } 3403 3404 return m; 3405} 3406 3407#if defined(INET) || defined(INET6) 3408static __inline int 3409hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m) 3410{ 3411#if __FreeBSD_version >= 1100095 3412 if (hn_lro_mbufq_depth) { 3413 tcp_lro_queue_mbuf(lc, m); 3414 return 0; 3415 } 3416#endif 3417 return tcp_lro_rx(lc, m, 0); 3418} 3419#endif 3420 3421static int 3422hn_rxpkt(struct hn_rx_ring *rxr) 3423{ 3424 struct ifnet *ifp, *hn_ifp = rxr->hn_ifp; 3425 struct mbuf *m_new, *n; 3426 int size, do_lro = 0, do_csum = 1, is_vf = 0; 3427 int hash_type = M_HASHTYPE_NONE; 3428 int l3proto = ETHERTYPE_MAX, l4proto = IPPROTO_DONE; 3429 int i; 3430 3431 ifp = hn_ifp; 3432 if (rxr->hn_rxvf_ifp != NULL) { 3433 /* 3434 * Non-transparent mode VF; pretend this packet is from 3435 * the VF. 3436 */ 3437 ifp = rxr->hn_rxvf_ifp; 3438 is_vf = 1; 3439 } else if (rxr->hn_rx_flags & HN_RX_FLAG_XPNT_VF) { 3440 /* Transparent mode VF. */ 3441 is_vf = 1; 3442 } 3443 3444 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 3445 /* 3446 * NOTE: 3447 * See the NOTE of hn_rndis_init_fixat(). This 3448 * function can be reached, immediately after the 3449 * RNDIS is initialized but before the ifnet is 3450 * setup on the hn_attach() path; drop the unexpected 3451 * packets. 3452 */ 3453 return (0); 3454 } 3455 3456 if (__predict_false(rxr->rsc.pktlen < ETHER_HDR_LEN)) { 3457 if_inc_counter(hn_ifp, IFCOUNTER_IERRORS, 1); 3458 return (0); 3459 } 3460 3461 if (rxr->rsc.cnt == 1 && rxr->rsc.pktlen <= MHLEN) { 3462 m_new = m_gethdr(M_NOWAIT, MT_DATA); 3463 if (m_new == NULL) { 3464 if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1); 3465 return (0); 3466 } 3467 memcpy(mtod(m_new, void *), rxr->rsc.frag_data[0], 3468 rxr->rsc.frag_len[0]); 3469 m_new->m_pkthdr.len = m_new->m_len = rxr->rsc.frag_len[0]; 3470 } else { 3471 /* 3472 * Get an mbuf with a cluster. For packets 2K or less, 3473 * get a standard 2K cluster. For anything larger, get a 3474 * 4K cluster. Any buffers larger than 4K can cause problems 3475 * if looped around to the Hyper-V TX channel, so avoid them. 3476 */ 3477 size = MCLBYTES; 3478 if (rxr->rsc.pktlen > MCLBYTES) { 3479 /* 4096 */ 3480 size = MJUMPAGESIZE; 3481 } 3482 3483 m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size); 3484 if (m_new == NULL) { 3485 if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1); 3486 return (0); 3487 } 3488 3489 n = m_new; 3490 for (i = 0; i < rxr->rsc.cnt; i++) { 3491 n = hv_m_append(n, rxr->rsc.frag_len[i], 3492 rxr->rsc.frag_data[i]); 3493 if (n == NULL) { 3494 if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1); 3495 return (0); 3496 } else { 3497 m_new->m_pkthdr.len += rxr->rsc.frag_len[i]; 3498 } 3499 } 3500 } 3501 if (rxr->rsc.pktlen <= MHLEN) 3502 rxr->hn_small_pkts++; 3503 3504 m_new->m_pkthdr.rcvif = ifp; 3505 3506 if (__predict_false((hn_ifp->if_capenable & IFCAP_RXCSUM) == 0)) 3507 do_csum = 0; 3508 3509 /* receive side checksum offload */ 3510 if (rxr->rsc.csum_info != NULL) { 3511 /* IP csum offload */ 3512 if ((*(rxr->rsc.csum_info) & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) { 3513 m_new->m_pkthdr.csum_flags |= 3514 (CSUM_IP_CHECKED | CSUM_IP_VALID); 3515 rxr->hn_csum_ip++; 3516 } 3517 3518 /* TCP/UDP csum offload */ 3519 if ((*(rxr->rsc.csum_info) & (NDIS_RXCSUM_INFO_UDPCS_OK | 3520 NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) { 3521 m_new->m_pkthdr.csum_flags |= 3522 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 3523 m_new->m_pkthdr.csum_data = 0xffff; 3524 if (*(rxr->rsc.csum_info) & NDIS_RXCSUM_INFO_TCPCS_OK) 3525 rxr->hn_csum_tcp++; 3526 else 3527 rxr->hn_csum_udp++; 3528 } 3529 3530 /* 3531 * XXX 3532 * As of this write (Oct 28th, 2016), host side will turn 3533 * on only TCPCS_OK and IPCS_OK even for UDP datagrams, so 3534 * the do_lro setting here is actually _not_ accurate. We 3535 * depend on the RSS hash type check to reset do_lro. 3536 */ 3537 if ((*(rxr->rsc.csum_info) & 3538 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) == 3539 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) 3540 do_lro = 1; 3541 } else { 3542 hn_rxpkt_proto(m_new, &l3proto, &l4proto); 3543 if (l3proto == ETHERTYPE_IP) { 3544 if (l4proto == IPPROTO_TCP) { 3545 if (do_csum && 3546 (rxr->hn_trust_hcsum & 3547 HN_TRUST_HCSUM_TCP)) { 3548 rxr->hn_csum_trusted++; 3549 m_new->m_pkthdr.csum_flags |= 3550 (CSUM_IP_CHECKED | CSUM_IP_VALID | 3551 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 3552 m_new->m_pkthdr.csum_data = 0xffff; 3553 } 3554 do_lro = 1; 3555 } else if (l4proto == IPPROTO_UDP) { 3556 if (do_csum && 3557 (rxr->hn_trust_hcsum & 3558 HN_TRUST_HCSUM_UDP)) { 3559 rxr->hn_csum_trusted++; 3560 m_new->m_pkthdr.csum_flags |= 3561 (CSUM_IP_CHECKED | CSUM_IP_VALID | 3562 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 3563 m_new->m_pkthdr.csum_data = 0xffff; 3564 } 3565 } else if (l4proto != IPPROTO_DONE && do_csum && 3566 (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) { 3567 rxr->hn_csum_trusted++; 3568 m_new->m_pkthdr.csum_flags |= 3569 (CSUM_IP_CHECKED | CSUM_IP_VALID); 3570 } 3571 } 3572 } 3573 3574 if (rxr->rsc.vlan_info != NULL) { 3575 m_new->m_pkthdr.ether_vtag = EVL_MAKETAG( 3576 NDIS_VLAN_INFO_ID(*(rxr->rsc.vlan_info)), 3577 NDIS_VLAN_INFO_PRI(*(rxr->rsc.vlan_info)), 3578 NDIS_VLAN_INFO_CFI(*(rxr->rsc.vlan_info))); 3579 m_new->m_flags |= M_VLANTAG; 3580 } 3581 3582 /* 3583 * If VF is activated (tranparent/non-transparent mode does not 3584 * matter here). 3585 * 3586 * - Disable LRO 3587 * 3588 * hn(4) will only receive broadcast packets, multicast packets, 3589 * TCP SYN and SYN|ACK (in Azure), LRO is useless for these 3590 * packet types. 3591 * 3592 * For non-transparent, we definitely _cannot_ enable LRO at 3593 * all, since the LRO flush will use hn(4) as the receiving 3594 * interface; i.e. hn_ifp->if_input(hn_ifp, m). 3595 */ 3596 if (is_vf) 3597 do_lro = 0; 3598 3599 /* 3600 * If VF is activated (tranparent/non-transparent mode does not 3601 * matter here), do _not_ mess with unsupported hash types or 3602 * functions. 3603 */ 3604 if (rxr->rsc.hash_info != NULL) { 3605 rxr->hn_rss_pkts++; 3606 m_new->m_pkthdr.flowid = *(rxr->rsc.hash_value); 3607 if (!is_vf) 3608 hash_type = M_HASHTYPE_OPAQUE_HASH; 3609 if ((*(rxr->rsc.hash_info) & NDIS_HASH_FUNCTION_MASK) == 3610 NDIS_HASH_FUNCTION_TOEPLITZ) { 3611 uint32_t type = (*(rxr->rsc.hash_info) & NDIS_HASH_TYPE_MASK & 3612 rxr->hn_mbuf_hash); 3613 3614 /* 3615 * NOTE: 3616 * do_lro is resetted, if the hash types are not TCP 3617 * related. See the comment in the above csum_flags 3618 * setup section. 3619 */ 3620 switch (type) { 3621 case NDIS_HASH_IPV4: 3622 hash_type = M_HASHTYPE_RSS_IPV4; 3623 do_lro = 0; 3624 break; 3625 3626 case NDIS_HASH_TCP_IPV4: 3627 hash_type = M_HASHTYPE_RSS_TCP_IPV4; 3628 if (rxr->hn_rx_flags & HN_RX_FLAG_UDP_HASH) { 3629 int def_htype = M_HASHTYPE_OPAQUE_HASH; 3630 3631 if (is_vf) 3632 def_htype = M_HASHTYPE_NONE; 3633 3634 /* 3635 * UDP 4-tuple hash is delivered as 3636 * TCP 4-tuple hash. 3637 */ 3638 if (l3proto == ETHERTYPE_MAX) { 3639 hn_rxpkt_proto(m_new, 3640 &l3proto, &l4proto); 3641 } 3642 if (l3proto == ETHERTYPE_IP) { 3643 if (l4proto == IPPROTO_UDP && 3644 (rxr->hn_mbuf_hash & 3645 NDIS_HASH_UDP_IPV4_X)) { 3646 hash_type = 3647 M_HASHTYPE_RSS_UDP_IPV4; 3648 do_lro = 0; 3649 } else if (l4proto != 3650 IPPROTO_TCP) { 3651 hash_type = def_htype; 3652 do_lro = 0; 3653 } 3654 } else { 3655 hash_type = def_htype; 3656 do_lro = 0; 3657 } 3658 } 3659 break; 3660 3661 case NDIS_HASH_IPV6: 3662 hash_type = M_HASHTYPE_RSS_IPV6; 3663 do_lro = 0; 3664 break; 3665 3666 case NDIS_HASH_IPV6_EX: 3667 hash_type = M_HASHTYPE_RSS_IPV6_EX; 3668 do_lro = 0; 3669 break; 3670 3671 case NDIS_HASH_TCP_IPV6: 3672 hash_type = M_HASHTYPE_RSS_TCP_IPV6; 3673 break; 3674 3675 case NDIS_HASH_TCP_IPV6_EX: 3676 hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX; 3677 break; 3678 } 3679 } 3680 } else if (!is_vf) { 3681 m_new->m_pkthdr.flowid = rxr->hn_rx_idx; 3682 hash_type = M_HASHTYPE_OPAQUE; 3683 } 3684 M_HASHTYPE_SET(m_new, hash_type); 3685 3686 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 3687 if (hn_ifp != ifp) { 3688 const struct ether_header *eh; 3689 3690 /* 3691 * Non-transparent mode VF is activated. 3692 */ 3693 3694 /* 3695 * Allow tapping on hn(4). 3696 */ 3697 ETHER_BPF_MTAP(hn_ifp, m_new); 3698 3699 /* 3700 * Update hn(4)'s stats. 3701 */ 3702 if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1); 3703 if_inc_counter(hn_ifp, IFCOUNTER_IBYTES, m_new->m_pkthdr.len); 3704 /* Checked at the beginning of this function. */ 3705 KASSERT(m_new->m_len >= ETHER_HDR_LEN, ("not ethernet frame")); 3706 eh = mtod(m_new, struct ether_header *); 3707 if (ETHER_IS_MULTICAST(eh->ether_dhost)) 3708 if_inc_counter(hn_ifp, IFCOUNTER_IMCASTS, 1); 3709 } 3710 rxr->hn_pkts++; 3711 3712 if ((hn_ifp->if_capenable & IFCAP_LRO) && do_lro) { 3713#if defined(INET) || defined(INET6) 3714 struct lro_ctrl *lro = &rxr->hn_lro; 3715 3716 if (lro->lro_cnt) { 3717 rxr->hn_lro_tried++; 3718 if (hn_lro_rx(lro, m_new) == 0) { 3719 /* DONE! */ 3720 return 0; 3721 } 3722 } 3723#endif 3724 } 3725 ifp->if_input(ifp, m_new); 3726 3727 return (0); 3728} 3729 3730static int 3731hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 3732{ 3733 struct hn_softc *sc = ifp->if_softc; 3734 struct ifreq *ifr = (struct ifreq *)data, ifr_vf; 3735 struct ifnet *vf_ifp; 3736 int mask, error = 0; 3737 struct ifrsskey *ifrk; 3738 struct ifrsshash *ifrh; 3739 uint32_t mtu; 3740 3741 switch (cmd) { 3742 case SIOCSIFMTU: 3743 if (ifr->ifr_mtu > HN_MTU_MAX) { 3744 error = EINVAL; 3745 break; 3746 } 3747 3748 HN_LOCK(sc); 3749 3750 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 3751 HN_UNLOCK(sc); 3752 break; 3753 } 3754 3755 if ((sc->hn_caps & HN_CAP_MTU) == 0) { 3756 /* Can't change MTU */ 3757 HN_UNLOCK(sc); 3758 error = EOPNOTSUPP; 3759 break; 3760 } 3761 3762 if (ifp->if_mtu == ifr->ifr_mtu) { 3763 HN_UNLOCK(sc); 3764 break; 3765 } 3766 3767 if (hn_xpnt_vf_isready(sc)) { 3768 vf_ifp = sc->hn_vf_ifp; 3769 ifr_vf = *ifr; 3770 strlcpy(ifr_vf.ifr_name, vf_ifp->if_xname, 3771 sizeof(ifr_vf.ifr_name)); 3772 error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU, 3773 (caddr_t)&ifr_vf); 3774 if (error) { 3775 HN_UNLOCK(sc); 3776 if_printf(ifp, "%s SIOCSIFMTU %d failed: %d\n", 3777 vf_ifp->if_xname, ifr->ifr_mtu, error); 3778 break; 3779 } 3780 } 3781 3782 /* 3783 * Suspend this interface before the synthetic parts 3784 * are ripped. 3785 */ 3786 hn_suspend(sc); 3787 3788 /* 3789 * Detach the synthetics parts, i.e. NVS and RNDIS. 3790 */ 3791 hn_synth_detach(sc); 3792 3793 /* 3794 * Reattach the synthetic parts, i.e. NVS and RNDIS, 3795 * with the new MTU setting. 3796 */ 3797 error = hn_synth_attach(sc, ifr->ifr_mtu); 3798 if (error) { 3799 HN_UNLOCK(sc); 3800 break; 3801 } 3802 3803 error = hn_rndis_get_mtu(sc, &mtu); 3804 if (error) 3805 mtu = ifr->ifr_mtu; 3806 else if (bootverbose) 3807 if_printf(ifp, "RNDIS mtu %u\n", mtu); 3808 3809 /* 3810 * Commit the requested MTU, after the synthetic parts 3811 * have been successfully attached. 3812 */ 3813 if (mtu >= ifr->ifr_mtu) { 3814 mtu = ifr->ifr_mtu; 3815 } else { 3816 if_printf(ifp, "fixup mtu %d -> %u\n", 3817 ifr->ifr_mtu, mtu); 3818 } 3819 ifp->if_mtu = mtu; 3820 3821 /* 3822 * Synthetic parts' reattach may change the chimney 3823 * sending size; update it. 3824 */ 3825 if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax) 3826 hn_set_chim_size(sc, sc->hn_chim_szmax); 3827 3828 /* 3829 * Make sure that various parameters based on MTU are 3830 * still valid, after the MTU change. 3831 */ 3832 hn_mtu_change_fixup(sc); 3833 3834 /* 3835 * All done! Resume the interface now. 3836 */ 3837 hn_resume(sc); 3838 3839 if ((sc->hn_flags & HN_FLAG_RXVF) || 3840 (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) { 3841 /* 3842 * Since we have reattached the NVS part, 3843 * change the datapath to VF again; in case 3844 * that it is lost, after the NVS was detached. 3845 */ 3846 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF); 3847 } 3848 3849 HN_UNLOCK(sc); 3850 break; 3851 3852 case SIOCSIFFLAGS: 3853 HN_LOCK(sc); 3854 3855 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 3856 HN_UNLOCK(sc); 3857 break; 3858 } 3859 3860 if (hn_xpnt_vf_isready(sc)) 3861 hn_xpnt_vf_saveifflags(sc); 3862 3863 if (ifp->if_flags & IFF_UP) { 3864 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3865 /* 3866 * Caller meight hold mutex, e.g. 3867 * bpf; use busy-wait for the RNDIS 3868 * reply. 3869 */ 3870 HN_NO_SLEEPING(sc); 3871 hn_rxfilter_config(sc); 3872 HN_SLEEPING_OK(sc); 3873 3874 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 3875 error = hn_xpnt_vf_iocsetflags(sc); 3876 } else { 3877 hn_init_locked(sc); 3878 } 3879 } else { 3880 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 3881 hn_stop(sc, false); 3882 } 3883 sc->hn_if_flags = ifp->if_flags; 3884 3885 HN_UNLOCK(sc); 3886 break; 3887 3888 case SIOCSIFCAP: 3889 HN_LOCK(sc); 3890 3891 if (hn_xpnt_vf_isready(sc)) { 3892 ifr_vf = *ifr; 3893 strlcpy(ifr_vf.ifr_name, sc->hn_vf_ifp->if_xname, 3894 sizeof(ifr_vf.ifr_name)); 3895 error = hn_xpnt_vf_iocsetcaps(sc, &ifr_vf); 3896 HN_UNLOCK(sc); 3897 break; 3898 } 3899 3900 /* 3901 * Fix up requested capabilities w/ supported capabilities, 3902 * since the supported capabilities could have been changed. 3903 */ 3904 mask = (ifr->ifr_reqcap & ifp->if_capabilities) ^ 3905 ifp->if_capenable; 3906 3907 if (mask & IFCAP_TXCSUM) { 3908 ifp->if_capenable ^= IFCAP_TXCSUM; 3909 if (ifp->if_capenable & IFCAP_TXCSUM) 3910 ifp->if_hwassist |= HN_CSUM_IP_HWASSIST(sc); 3911 else 3912 ifp->if_hwassist &= ~HN_CSUM_IP_HWASSIST(sc); 3913 } 3914 if (mask & IFCAP_TXCSUM_IPV6) { 3915 ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; 3916 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) 3917 ifp->if_hwassist |= HN_CSUM_IP6_HWASSIST(sc); 3918 else 3919 ifp->if_hwassist &= ~HN_CSUM_IP6_HWASSIST(sc); 3920 } 3921 3922 /* TODO: flip RNDIS offload parameters for RXCSUM. */ 3923 if (mask & IFCAP_RXCSUM) 3924 ifp->if_capenable ^= IFCAP_RXCSUM; 3925#ifdef foo 3926 /* We can't diff IPv6 packets from IPv4 packets on RX path. */ 3927 if (mask & IFCAP_RXCSUM_IPV6) 3928 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; 3929#endif 3930 3931 if (mask & IFCAP_LRO) 3932 ifp->if_capenable ^= IFCAP_LRO; 3933 3934 if (mask & IFCAP_TSO4) { 3935 ifp->if_capenable ^= IFCAP_TSO4; 3936 if (ifp->if_capenable & IFCAP_TSO4) 3937 ifp->if_hwassist |= CSUM_IP_TSO; 3938 else 3939 ifp->if_hwassist &= ~CSUM_IP_TSO; 3940 } 3941 if (mask & IFCAP_TSO6) { 3942 ifp->if_capenable ^= IFCAP_TSO6; 3943 if (ifp->if_capenable & IFCAP_TSO6) 3944 ifp->if_hwassist |= CSUM_IP6_TSO; 3945 else 3946 ifp->if_hwassist &= ~CSUM_IP6_TSO; 3947 } 3948 3949 HN_UNLOCK(sc); 3950 break; 3951 3952 case SIOCADDMULTI: 3953 case SIOCDELMULTI: 3954 HN_LOCK(sc); 3955 3956 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 3957 HN_UNLOCK(sc); 3958 break; 3959 } 3960 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3961 /* 3962 * Multicast uses mutex; use busy-wait for 3963 * the RNDIS reply. 3964 */ 3965 HN_NO_SLEEPING(sc); 3966 hn_rxfilter_config(sc); 3967 HN_SLEEPING_OK(sc); 3968 } 3969 3970 /* XXX vlan(4) style mcast addr maintenance */ 3971 if (hn_xpnt_vf_isready(sc)) { 3972 int old_if_flags; 3973 3974 old_if_flags = sc->hn_vf_ifp->if_flags; 3975 hn_xpnt_vf_saveifflags(sc); 3976 3977 if ((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) && 3978 ((old_if_flags ^ sc->hn_vf_ifp->if_flags) & 3979 IFF_ALLMULTI)) 3980 error = hn_xpnt_vf_iocsetflags(sc); 3981 } 3982 3983 HN_UNLOCK(sc); 3984 break; 3985 3986 case SIOCSIFMEDIA: 3987 case SIOCGIFMEDIA: 3988 HN_LOCK(sc); 3989 if (hn_xpnt_vf_isready(sc)) { 3990 /* 3991 * SIOCGIFMEDIA expects ifmediareq, so don't 3992 * create and pass ifr_vf to the VF here; just 3993 * replace the ifr_name. 3994 */ 3995 vf_ifp = sc->hn_vf_ifp; 3996 strlcpy(ifr->ifr_name, vf_ifp->if_xname, 3997 sizeof(ifr->ifr_name)); 3998 error = vf_ifp->if_ioctl(vf_ifp, cmd, data); 3999 /* Restore the ifr_name. */ 4000 strlcpy(ifr->ifr_name, ifp->if_xname, 4001 sizeof(ifr->ifr_name)); 4002 HN_UNLOCK(sc); 4003 break; 4004 } 4005 HN_UNLOCK(sc); 4006 error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd); 4007 break; 4008 4009 case SIOCGIFRSSHASH: 4010 ifrh = (struct ifrsshash *)data; 4011 HN_LOCK(sc); 4012 if (sc->hn_rx_ring_inuse == 1) { 4013 HN_UNLOCK(sc); 4014 ifrh->ifrh_func = RSS_FUNC_NONE; 4015 ifrh->ifrh_types = 0; 4016 break; 4017 } 4018 4019 if (sc->hn_rss_hash & NDIS_HASH_FUNCTION_TOEPLITZ) 4020 ifrh->ifrh_func = RSS_FUNC_TOEPLITZ; 4021 else 4022 ifrh->ifrh_func = RSS_FUNC_PRIVATE; 4023 ifrh->ifrh_types = hn_rss_type_fromndis(sc->hn_rss_hash); 4024 HN_UNLOCK(sc); 4025 break; 4026 4027 case SIOCGIFRSSKEY: 4028 ifrk = (struct ifrsskey *)data; 4029 HN_LOCK(sc); 4030 if (sc->hn_rx_ring_inuse == 1) { 4031 HN_UNLOCK(sc); 4032 ifrk->ifrk_func = RSS_FUNC_NONE; 4033 ifrk->ifrk_keylen = 0; 4034 break; 4035 } 4036 if (sc->hn_rss_hash & NDIS_HASH_FUNCTION_TOEPLITZ) 4037 ifrk->ifrk_func = RSS_FUNC_TOEPLITZ; 4038 else 4039 ifrk->ifrk_func = RSS_FUNC_PRIVATE; 4040 ifrk->ifrk_keylen = NDIS_HASH_KEYSIZE_TOEPLITZ; 4041 memcpy(ifrk->ifrk_key, sc->hn_rss.rss_key, 4042 NDIS_HASH_KEYSIZE_TOEPLITZ); 4043 HN_UNLOCK(sc); 4044 break; 4045 4046 default: 4047 error = ether_ioctl(ifp, cmd, data); 4048 break; 4049 } 4050 return (error); 4051} 4052 4053static void 4054hn_stop(struct hn_softc *sc, bool detaching) 4055{ 4056 struct ifnet *ifp = sc->hn_ifp; 4057 int i; 4058 4059 HN_LOCK_ASSERT(sc); 4060 4061 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 4062 ("synthetic parts were not attached")); 4063 4064 /* Clear RUNNING bit ASAP. */ 4065 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); 4066 4067 /* Disable polling. */ 4068 hn_polling(sc, 0); 4069 4070 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) { 4071 KASSERT(sc->hn_vf_ifp != NULL, 4072 ("%s: VF is not attached", ifp->if_xname)); 4073 4074 /* Mark transparent mode VF as disabled. */ 4075 hn_xpnt_vf_setdisable(sc, false /* keep hn_vf_ifp */); 4076 4077 /* 4078 * NOTE: 4079 * Datapath setting must happen _before_ bringing 4080 * the VF down. 4081 */ 4082 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH); 4083 4084 /* 4085 * Bring the VF down. 4086 */ 4087 hn_xpnt_vf_saveifflags(sc); 4088 sc->hn_vf_ifp->if_flags &= ~IFF_UP; 4089 hn_xpnt_vf_iocsetflags(sc); 4090 } 4091 4092 /* Suspend data transfers. */ 4093 hn_suspend_data(sc); 4094 4095 /* Clear OACTIVE bit. */ 4096 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 4097 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 4098 sc->hn_tx_ring[i].hn_oactive = 0; 4099 4100 /* 4101 * If the non-transparent mode VF is active, make sure 4102 * that the RX filter still allows packet reception. 4103 */ 4104 if (!detaching && (sc->hn_flags & HN_FLAG_RXVF)) 4105 hn_rxfilter_config(sc); 4106} 4107 4108static void 4109hn_init_locked(struct hn_softc *sc) 4110{ 4111 struct ifnet *ifp = sc->hn_ifp; 4112 int i; 4113 4114 HN_LOCK_ASSERT(sc); 4115 4116 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 4117 return; 4118 4119 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 4120 return; 4121 4122 /* Configure RX filter */ 4123 hn_rxfilter_config(sc); 4124 4125 /* Clear OACTIVE bit. */ 4126 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 4127 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 4128 sc->hn_tx_ring[i].hn_oactive = 0; 4129 4130 /* Clear TX 'suspended' bit. */ 4131 hn_resume_tx(sc, sc->hn_tx_ring_inuse); 4132 4133 if (hn_xpnt_vf_isready(sc)) { 4134 /* Initialize transparent VF. */ 4135 hn_xpnt_vf_init(sc); 4136 } 4137 4138 /* Everything is ready; unleash! */ 4139 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); 4140 4141 /* Re-enable polling if requested. */ 4142 if (sc->hn_pollhz > 0) 4143 hn_polling(sc, sc->hn_pollhz); 4144} 4145 4146static void 4147hn_init(void *xsc) 4148{ 4149 struct hn_softc *sc = xsc; 4150 4151 HN_LOCK(sc); 4152 hn_init_locked(sc); 4153 HN_UNLOCK(sc); 4154} 4155 4156#if __FreeBSD_version >= 1100099 4157 4158static int 4159hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS) 4160{ 4161 struct hn_softc *sc = arg1; 4162 unsigned int lenlim; 4163 int error; 4164 4165 lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim; 4166 error = sysctl_handle_int(oidp, &lenlim, 0, req); 4167 if (error || req->newptr == NULL) 4168 return error; 4169 4170 HN_LOCK(sc); 4171 if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) || 4172 lenlim > TCP_LRO_LENGTH_MAX) { 4173 HN_UNLOCK(sc); 4174 return EINVAL; 4175 } 4176 hn_set_lro_lenlim(sc, lenlim); 4177 HN_UNLOCK(sc); 4178 4179 return 0; 4180} 4181 4182static int 4183hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS) 4184{ 4185 struct hn_softc *sc = arg1; 4186 int ackcnt, error, i; 4187 4188 /* 4189 * lro_ackcnt_lim is append count limit, 4190 * +1 to turn it into aggregation limit. 4191 */ 4192 ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1; 4193 error = sysctl_handle_int(oidp, &ackcnt, 0, req); 4194 if (error || req->newptr == NULL) 4195 return error; 4196 4197 if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1)) 4198 return EINVAL; 4199 4200 /* 4201 * Convert aggregation limit back to append 4202 * count limit. 4203 */ 4204 --ackcnt; 4205 HN_LOCK(sc); 4206 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 4207 sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt; 4208 HN_UNLOCK(sc); 4209 return 0; 4210} 4211 4212#endif 4213 4214static int 4215hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS) 4216{ 4217 struct hn_softc *sc = arg1; 4218 int hcsum = arg2; 4219 int on, error, i; 4220 4221 on = 0; 4222 if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum) 4223 on = 1; 4224 4225 error = sysctl_handle_int(oidp, &on, 0, req); 4226 if (error || req->newptr == NULL) 4227 return error; 4228 4229 HN_LOCK(sc); 4230 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4231 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 4232 4233 if (on) 4234 rxr->hn_trust_hcsum |= hcsum; 4235 else 4236 rxr->hn_trust_hcsum &= ~hcsum; 4237 } 4238 HN_UNLOCK(sc); 4239 return 0; 4240} 4241 4242static int 4243hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS) 4244{ 4245 struct hn_softc *sc = arg1; 4246 int chim_size, error; 4247 4248 chim_size = sc->hn_tx_ring[0].hn_chim_size; 4249 error = sysctl_handle_int(oidp, &chim_size, 0, req); 4250 if (error || req->newptr == NULL) 4251 return error; 4252 4253 if (chim_size > sc->hn_chim_szmax || chim_size <= 0) 4254 return EINVAL; 4255 4256 HN_LOCK(sc); 4257 hn_set_chim_size(sc, chim_size); 4258 HN_UNLOCK(sc); 4259 return 0; 4260} 4261 4262#if __FreeBSD_version < 1100095 4263static int 4264hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS) 4265{ 4266 struct hn_softc *sc = arg1; 4267 int ofs = arg2, i, error; 4268 struct hn_rx_ring *rxr; 4269 uint64_t stat; 4270 4271 stat = 0; 4272 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4273 rxr = &sc->hn_rx_ring[i]; 4274 stat += *((int *)((uint8_t *)rxr + ofs)); 4275 } 4276 4277 error = sysctl_handle_64(oidp, &stat, 0, req); 4278 if (error || req->newptr == NULL) 4279 return error; 4280 4281 /* Zero out this stat. */ 4282 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4283 rxr = &sc->hn_rx_ring[i]; 4284 *((int *)((uint8_t *)rxr + ofs)) = 0; 4285 } 4286 return 0; 4287} 4288#else 4289static int 4290hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS) 4291{ 4292 struct hn_softc *sc = arg1; 4293 int ofs = arg2, i, error; 4294 struct hn_rx_ring *rxr; 4295 uint64_t stat; 4296 4297 stat = 0; 4298 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4299 rxr = &sc->hn_rx_ring[i]; 4300 stat += *((uint64_t *)((uint8_t *)rxr + ofs)); 4301 } 4302 4303 error = sysctl_handle_64(oidp, &stat, 0, req); 4304 if (error || req->newptr == NULL) 4305 return error; 4306 4307 /* Zero out this stat. */ 4308 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4309 rxr = &sc->hn_rx_ring[i]; 4310 *((uint64_t *)((uint8_t *)rxr + ofs)) = 0; 4311 } 4312 return 0; 4313} 4314 4315#endif 4316 4317static int 4318hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) 4319{ 4320 struct hn_softc *sc = arg1; 4321 int ofs = arg2, i, error; 4322 struct hn_rx_ring *rxr; 4323 u_long stat; 4324 4325 stat = 0; 4326 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4327 rxr = &sc->hn_rx_ring[i]; 4328 stat += *((u_long *)((uint8_t *)rxr + ofs)); 4329 } 4330 4331 error = sysctl_handle_long(oidp, &stat, 0, req); 4332 if (error || req->newptr == NULL) 4333 return error; 4334 4335 /* Zero out this stat. */ 4336 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4337 rxr = &sc->hn_rx_ring[i]; 4338 *((u_long *)((uint8_t *)rxr + ofs)) = 0; 4339 } 4340 return 0; 4341} 4342 4343static int 4344hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) 4345{ 4346 struct hn_softc *sc = arg1; 4347 int ofs = arg2, i, error; 4348 struct hn_tx_ring *txr; 4349 u_long stat; 4350 4351 stat = 0; 4352 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 4353 txr = &sc->hn_tx_ring[i]; 4354 stat += *((u_long *)((uint8_t *)txr + ofs)); 4355 } 4356 4357 error = sysctl_handle_long(oidp, &stat, 0, req); 4358 if (error || req->newptr == NULL) 4359 return error; 4360 4361 /* Zero out this stat. */ 4362 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 4363 txr = &sc->hn_tx_ring[i]; 4364 *((u_long *)((uint8_t *)txr + ofs)) = 0; 4365 } 4366 return 0; 4367} 4368 4369static int 4370hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS) 4371{ 4372 struct hn_softc *sc = arg1; 4373 int ofs = arg2, i, error, conf; 4374 struct hn_tx_ring *txr; 4375 4376 txr = &sc->hn_tx_ring[0]; 4377 conf = *((int *)((uint8_t *)txr + ofs)); 4378 4379 error = sysctl_handle_int(oidp, &conf, 0, req); 4380 if (error || req->newptr == NULL) 4381 return error; 4382 4383 HN_LOCK(sc); 4384 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 4385 txr = &sc->hn_tx_ring[i]; 4386 *((int *)((uint8_t *)txr + ofs)) = conf; 4387 } 4388 HN_UNLOCK(sc); 4389 4390 return 0; 4391} 4392 4393static int 4394hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS) 4395{ 4396 struct hn_softc *sc = arg1; 4397 int error, size; 4398 4399 size = sc->hn_agg_size; 4400 error = sysctl_handle_int(oidp, &size, 0, req); 4401 if (error || req->newptr == NULL) 4402 return (error); 4403 4404 HN_LOCK(sc); 4405 sc->hn_agg_size = size; 4406 hn_set_txagg(sc); 4407 HN_UNLOCK(sc); 4408 4409 return (0); 4410} 4411 4412static int 4413hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS) 4414{ 4415 struct hn_softc *sc = arg1; 4416 int error, pkts; 4417 4418 pkts = sc->hn_agg_pkts; 4419 error = sysctl_handle_int(oidp, &pkts, 0, req); 4420 if (error || req->newptr == NULL) 4421 return (error); 4422 4423 HN_LOCK(sc); 4424 sc->hn_agg_pkts = pkts; 4425 hn_set_txagg(sc); 4426 HN_UNLOCK(sc); 4427 4428 return (0); 4429} 4430 4431static int 4432hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS) 4433{ 4434 struct hn_softc *sc = arg1; 4435 int pkts; 4436 4437 pkts = sc->hn_tx_ring[0].hn_agg_pktmax; 4438 return (sysctl_handle_int(oidp, &pkts, 0, req)); 4439} 4440 4441static int 4442hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS) 4443{ 4444 struct hn_softc *sc = arg1; 4445 int align; 4446 4447 align = sc->hn_tx_ring[0].hn_agg_align; 4448 return (sysctl_handle_int(oidp, &align, 0, req)); 4449} 4450 4451static void 4452hn_chan_polling(struct vmbus_channel *chan, u_int pollhz) 4453{ 4454 if (pollhz == 0) 4455 vmbus_chan_poll_disable(chan); 4456 else 4457 vmbus_chan_poll_enable(chan, pollhz); 4458} 4459 4460static void 4461hn_polling(struct hn_softc *sc, u_int pollhz) 4462{ 4463 int nsubch = sc->hn_rx_ring_inuse - 1; 4464 4465 HN_LOCK_ASSERT(sc); 4466 4467 if (nsubch > 0) { 4468 struct vmbus_channel **subch; 4469 int i; 4470 4471 subch = vmbus_subchan_get(sc->hn_prichan, nsubch); 4472 for (i = 0; i < nsubch; ++i) 4473 hn_chan_polling(subch[i], pollhz); 4474 vmbus_subchan_rel(subch, nsubch); 4475 } 4476 hn_chan_polling(sc->hn_prichan, pollhz); 4477} 4478 4479static int 4480hn_polling_sysctl(SYSCTL_HANDLER_ARGS) 4481{ 4482 struct hn_softc *sc = arg1; 4483 int pollhz, error; 4484 4485 pollhz = sc->hn_pollhz; 4486 error = sysctl_handle_int(oidp, &pollhz, 0, req); 4487 if (error || req->newptr == NULL) 4488 return (error); 4489 4490 if (pollhz != 0 && 4491 (pollhz < VMBUS_CHAN_POLLHZ_MIN || pollhz > VMBUS_CHAN_POLLHZ_MAX)) 4492 return (EINVAL); 4493 4494 HN_LOCK(sc); 4495 if (sc->hn_pollhz != pollhz) { 4496 sc->hn_pollhz = pollhz; 4497 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && 4498 (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 4499 hn_polling(sc, sc->hn_pollhz); 4500 } 4501 HN_UNLOCK(sc); 4502 4503 return (0); 4504} 4505 4506static int 4507hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS) 4508{ 4509 struct hn_softc *sc = arg1; 4510 char verstr[16]; 4511 4512 snprintf(verstr, sizeof(verstr), "%u.%u", 4513 HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver), 4514 HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver)); 4515 return sysctl_handle_string(oidp, verstr, sizeof(verstr), req); 4516} 4517 4518static int 4519hn_caps_sysctl(SYSCTL_HANDLER_ARGS) 4520{ 4521 struct hn_softc *sc = arg1; 4522 char caps_str[128]; 4523 uint32_t caps; 4524 4525 HN_LOCK(sc); 4526 caps = sc->hn_caps; 4527 HN_UNLOCK(sc); 4528 snprintf(caps_str, sizeof(caps_str), "%b", caps, HN_CAP_BITS); 4529 return sysctl_handle_string(oidp, caps_str, sizeof(caps_str), req); 4530} 4531 4532static int 4533hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS) 4534{ 4535 struct hn_softc *sc = arg1; 4536 char assist_str[128]; 4537 uint32_t hwassist; 4538 4539 HN_LOCK(sc); 4540 hwassist = sc->hn_ifp->if_hwassist; 4541 HN_UNLOCK(sc); 4542 snprintf(assist_str, sizeof(assist_str), "%b", hwassist, CSUM_BITS); 4543 return sysctl_handle_string(oidp, assist_str, sizeof(assist_str), req); 4544} 4545 4546static int 4547hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS) 4548{ 4549 struct hn_softc *sc = arg1; 4550 char filter_str[128]; 4551 uint32_t filter; 4552 4553 HN_LOCK(sc); 4554 filter = sc->hn_rx_filter; 4555 HN_UNLOCK(sc); 4556 snprintf(filter_str, sizeof(filter_str), "%b", filter, 4557 NDIS_PACKET_TYPES); 4558 return sysctl_handle_string(oidp, filter_str, sizeof(filter_str), req); 4559} 4560 4561#ifndef RSS 4562 4563static int 4564hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS) 4565{ 4566 struct hn_softc *sc = arg1; 4567 int error; 4568 4569 HN_LOCK(sc); 4570 4571 error = SYSCTL_OUT(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); 4572 if (error || req->newptr == NULL) 4573 goto back; 4574 4575 if ((sc->hn_flags & HN_FLAG_RXVF) || 4576 (hn_xpnt_vf && sc->hn_vf_ifp != NULL)) { 4577 /* 4578 * RSS key is synchronized w/ VF's, don't allow users 4579 * to change it. 4580 */ 4581 error = EBUSY; 4582 goto back; 4583 } 4584 4585 error = SYSCTL_IN(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); 4586 if (error) 4587 goto back; 4588 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 4589 4590 if (sc->hn_rx_ring_inuse > 1) { 4591 error = hn_rss_reconfig(sc); 4592 } else { 4593 /* Not RSS capable, at least for now; just save the RSS key. */ 4594 error = 0; 4595 } 4596back: 4597 HN_UNLOCK(sc); 4598 return (error); 4599} 4600 4601static int 4602hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS) 4603{ 4604 struct hn_softc *sc = arg1; 4605 int error; 4606 4607 HN_LOCK(sc); 4608 4609 error = SYSCTL_OUT(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); 4610 if (error || req->newptr == NULL) 4611 goto back; 4612 4613 /* 4614 * Don't allow RSS indirect table change, if this interface is not 4615 * RSS capable currently. 4616 */ 4617 if (sc->hn_rx_ring_inuse == 1) { 4618 error = EOPNOTSUPP; 4619 goto back; 4620 } 4621 4622 error = SYSCTL_IN(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); 4623 if (error) 4624 goto back; 4625 sc->hn_flags |= HN_FLAG_HAS_RSSIND; 4626 4627 hn_rss_ind_fixup(sc); 4628 error = hn_rss_reconfig(sc); 4629back: 4630 HN_UNLOCK(sc); 4631 return (error); 4632} 4633 4634#endif /* !RSS */ 4635 4636static int 4637hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS) 4638{ 4639 struct hn_softc *sc = arg1; 4640 char hash_str[128]; 4641 uint32_t hash; 4642 4643 HN_LOCK(sc); 4644 hash = sc->hn_rss_hash; 4645 HN_UNLOCK(sc); 4646 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); 4647 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); 4648} 4649 4650static int 4651hn_rss_hcap_sysctl(SYSCTL_HANDLER_ARGS) 4652{ 4653 struct hn_softc *sc = arg1; 4654 char hash_str[128]; 4655 uint32_t hash; 4656 4657 HN_LOCK(sc); 4658 hash = sc->hn_rss_hcap; 4659 HN_UNLOCK(sc); 4660 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); 4661 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); 4662} 4663 4664static int 4665hn_rss_mbuf_sysctl(SYSCTL_HANDLER_ARGS) 4666{ 4667 struct hn_softc *sc = arg1; 4668 char hash_str[128]; 4669 uint32_t hash; 4670 4671 HN_LOCK(sc); 4672 hash = sc->hn_rx_ring[0].hn_mbuf_hash; 4673 HN_UNLOCK(sc); 4674 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); 4675 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); 4676} 4677 4678static int 4679hn_vf_sysctl(SYSCTL_HANDLER_ARGS) 4680{ 4681 struct hn_softc *sc = arg1; 4682 char vf_name[IFNAMSIZ + 1]; 4683 struct ifnet *vf_ifp; 4684 4685 HN_LOCK(sc); 4686 vf_name[0] = '\0'; 4687 vf_ifp = sc->hn_vf_ifp; 4688 if (vf_ifp != NULL) 4689 snprintf(vf_name, sizeof(vf_name), "%s", vf_ifp->if_xname); 4690 HN_UNLOCK(sc); 4691 return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req); 4692} 4693 4694static int 4695hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS) 4696{ 4697 struct hn_softc *sc = arg1; 4698 char vf_name[IFNAMSIZ + 1]; 4699 struct ifnet *vf_ifp; 4700 4701 HN_LOCK(sc); 4702 vf_name[0] = '\0'; 4703 vf_ifp = sc->hn_rx_ring[0].hn_rxvf_ifp; 4704 if (vf_ifp != NULL) 4705 snprintf(vf_name, sizeof(vf_name), "%s", vf_ifp->if_xname); 4706 HN_UNLOCK(sc); 4707 return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req); 4708} 4709 4710static int 4711hn_vflist_sysctl(SYSCTL_HANDLER_ARGS) 4712{ 4713 struct rm_priotracker pt; 4714 struct sbuf *sb; 4715 int error, i; 4716 bool first; 4717 4718 error = sysctl_wire_old_buffer(req, 0); 4719 if (error != 0) 4720 return (error); 4721 4722 sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); 4723 if (sb == NULL) 4724 return (ENOMEM); 4725 4726 rm_rlock(&hn_vfmap_lock, &pt); 4727 4728 first = true; 4729 for (i = 0; i < hn_vfmap_size; ++i) { 4730 struct ifnet *ifp; 4731 4732 if (hn_vfmap[i] == NULL) 4733 continue; 4734 4735 ifp = ifnet_byindex(i); 4736 if (ifp != NULL) { 4737 if (first) 4738 sbuf_printf(sb, "%s", ifp->if_xname); 4739 else 4740 sbuf_printf(sb, " %s", ifp->if_xname); 4741 first = false; 4742 } 4743 } 4744 4745 rm_runlock(&hn_vfmap_lock, &pt); 4746 4747 error = sbuf_finish(sb); 4748 sbuf_delete(sb); 4749 return (error); 4750} 4751 4752static int 4753hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS) 4754{ 4755 struct rm_priotracker pt; 4756 struct sbuf *sb; 4757 int error, i; 4758 bool first; 4759 4760 error = sysctl_wire_old_buffer(req, 0); 4761 if (error != 0) 4762 return (error); 4763 4764 sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); 4765 if (sb == NULL) 4766 return (ENOMEM); 4767 4768 rm_rlock(&hn_vfmap_lock, &pt); 4769 4770 first = true; 4771 for (i = 0; i < hn_vfmap_size; ++i) { 4772 struct ifnet *ifp, *hn_ifp; 4773 4774 hn_ifp = hn_vfmap[i]; 4775 if (hn_ifp == NULL) 4776 continue; 4777 4778 ifp = ifnet_byindex(i); 4779 if (ifp != NULL) { 4780 if (first) { 4781 sbuf_printf(sb, "%s:%s", ifp->if_xname, 4782 hn_ifp->if_xname); 4783 } else { 4784 sbuf_printf(sb, " %s:%s", ifp->if_xname, 4785 hn_ifp->if_xname); 4786 } 4787 first = false; 4788 } 4789 } 4790 4791 rm_runlock(&hn_vfmap_lock, &pt); 4792 4793 error = sbuf_finish(sb); 4794 sbuf_delete(sb); 4795 return (error); 4796} 4797 4798static int 4799hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS) 4800{ 4801 struct hn_softc *sc = arg1; 4802 int error, onoff = 0; 4803 4804 if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF) 4805 onoff = 1; 4806 error = sysctl_handle_int(oidp, &onoff, 0, req); 4807 if (error || req->newptr == NULL) 4808 return (error); 4809 4810 HN_LOCK(sc); 4811 /* NOTE: hn_vf_lock for hn_transmit() */ 4812 rm_wlock(&sc->hn_vf_lock); 4813 if (onoff) 4814 sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF; 4815 else 4816 sc->hn_xvf_flags &= ~HN_XVFFLAG_ACCBPF; 4817 rm_wunlock(&sc->hn_vf_lock); 4818 HN_UNLOCK(sc); 4819 4820 return (0); 4821} 4822 4823static int 4824hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS) 4825{ 4826 struct hn_softc *sc = arg1; 4827 int enabled = 0; 4828 4829 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 4830 enabled = 1; 4831 return (sysctl_handle_int(oidp, &enabled, 0, req)); 4832} 4833 4834static int 4835hn_check_iplen(const struct mbuf *m, int hoff) 4836{ 4837 const struct ip *ip; 4838 int len, iphlen, iplen; 4839 const struct tcphdr *th; 4840 int thoff; /* TCP data offset */ 4841 4842 len = hoff + sizeof(struct ip); 4843 4844 /* The packet must be at least the size of an IP header. */ 4845 if (m->m_pkthdr.len < len) 4846 return IPPROTO_DONE; 4847 4848 /* The fixed IP header must reside completely in the first mbuf. */ 4849 if (m->m_len < len) 4850 return IPPROTO_DONE; 4851 4852 ip = mtodo(m, hoff); 4853 4854 /* Bound check the packet's stated IP header length. */ 4855 iphlen = ip->ip_hl << 2; 4856 if (iphlen < sizeof(struct ip)) /* minimum header length */ 4857 return IPPROTO_DONE; 4858 4859 /* The full IP header must reside completely in the one mbuf. */ 4860 if (m->m_len < hoff + iphlen) 4861 return IPPROTO_DONE; 4862 4863 iplen = ntohs(ip->ip_len); 4864 4865 /* 4866 * Check that the amount of data in the buffers is as 4867 * at least much as the IP header would have us expect. 4868 */ 4869 if (m->m_pkthdr.len < hoff + iplen) 4870 return IPPROTO_DONE; 4871 4872 /* 4873 * Ignore IP fragments. 4874 */ 4875 if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF)) 4876 return IPPROTO_DONE; 4877 4878 /* 4879 * The TCP/IP or UDP/IP header must be entirely contained within 4880 * the first fragment of a packet. 4881 */ 4882 switch (ip->ip_p) { 4883 case IPPROTO_TCP: 4884 if (iplen < iphlen + sizeof(struct tcphdr)) 4885 return IPPROTO_DONE; 4886 if (m->m_len < hoff + iphlen + sizeof(struct tcphdr)) 4887 return IPPROTO_DONE; 4888 th = (const struct tcphdr *)((const uint8_t *)ip + iphlen); 4889 thoff = th->th_off << 2; 4890 if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen) 4891 return IPPROTO_DONE; 4892 if (m->m_len < hoff + iphlen + thoff) 4893 return IPPROTO_DONE; 4894 break; 4895 case IPPROTO_UDP: 4896 if (iplen < iphlen + sizeof(struct udphdr)) 4897 return IPPROTO_DONE; 4898 if (m->m_len < hoff + iphlen + sizeof(struct udphdr)) 4899 return IPPROTO_DONE; 4900 break; 4901 default: 4902 if (iplen < iphlen) 4903 return IPPROTO_DONE; 4904 break; 4905 } 4906 return ip->ip_p; 4907} 4908 4909static void 4910hn_rxpkt_proto(const struct mbuf *m_new, int *l3proto, int *l4proto) 4911{ 4912 const struct ether_header *eh; 4913 uint16_t etype; 4914 int hoff; 4915 4916 hoff = sizeof(*eh); 4917 /* Checked at the beginning of this function. */ 4918 KASSERT(m_new->m_len >= hoff, ("not ethernet frame")); 4919 4920 eh = mtod(m_new, const struct ether_header *); 4921 etype = ntohs(eh->ether_type); 4922 if (etype == ETHERTYPE_VLAN) { 4923 const struct ether_vlan_header *evl; 4924 4925 hoff = sizeof(*evl); 4926 if (m_new->m_len < hoff) 4927 return; 4928 evl = mtod(m_new, const struct ether_vlan_header *); 4929 etype = ntohs(evl->evl_proto); 4930 } 4931 *l3proto = etype; 4932 4933 if (etype == ETHERTYPE_IP) 4934 *l4proto = hn_check_iplen(m_new, hoff); 4935 else 4936 *l4proto = IPPROTO_DONE; 4937} 4938 4939static int 4940hn_create_rx_data(struct hn_softc *sc, int ring_cnt) 4941{ 4942 struct sysctl_oid_list *child; 4943 struct sysctl_ctx_list *ctx; 4944 device_t dev = sc->hn_dev; 4945#if defined(INET) || defined(INET6) 4946#if __FreeBSD_version >= 1100095 4947 int lroent_cnt; 4948#endif 4949#endif 4950 int i; 4951 4952 /* 4953 * Create RXBUF for reception. 4954 * 4955 * NOTE: 4956 * - It is shared by all channels. 4957 * - A large enough buffer is allocated, certain version of NVSes 4958 * may further limit the usable space. 4959 */ 4960 sc->hn_rxbuf = hyperv_dmamem_alloc(bus_get_dma_tag(dev), 4961 PAGE_SIZE, 0, HN_RXBUF_SIZE, &sc->hn_rxbuf_dma, 4962 BUS_DMA_WAITOK | BUS_DMA_ZERO); 4963 if (sc->hn_rxbuf == NULL) { 4964 device_printf(sc->hn_dev, "allocate rxbuf failed\n"); 4965 return (ENOMEM); 4966 } 4967 4968 sc->hn_rx_ring_cnt = ring_cnt; 4969 sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt; 4970 4971 sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt, 4972 M_DEVBUF, M_WAITOK | M_ZERO); 4973 4974#if defined(INET) || defined(INET6) 4975#if __FreeBSD_version >= 1100095 4976 lroent_cnt = hn_lro_entry_count; 4977 if (lroent_cnt < TCP_LRO_ENTRIES) 4978 lroent_cnt = TCP_LRO_ENTRIES; 4979 if (bootverbose) 4980 device_printf(dev, "LRO: entry count %d\n", lroent_cnt); 4981#endif 4982#endif /* INET || INET6 */ 4983 4984 ctx = device_get_sysctl_ctx(dev); 4985 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 4986 4987 /* Create dev.hn.UNIT.rx sysctl tree */ 4988 sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx", 4989 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 4990 4991 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4992 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 4993 4994 rxr->hn_br = hyperv_dmamem_alloc(bus_get_dma_tag(dev), 4995 PAGE_SIZE, 0, HN_TXBR_SIZE + HN_RXBR_SIZE, 4996 &rxr->hn_br_dma, BUS_DMA_WAITOK); 4997 if (rxr->hn_br == NULL) { 4998 device_printf(dev, "allocate bufring failed\n"); 4999 return (ENOMEM); 5000 } 5001 5002 if (hn_trust_hosttcp) 5003 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP; 5004 if (hn_trust_hostudp) 5005 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP; 5006 if (hn_trust_hostip) 5007 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP; 5008 rxr->hn_mbuf_hash = NDIS_HASH_ALL; 5009 rxr->hn_ifp = sc->hn_ifp; 5010 if (i < sc->hn_tx_ring_cnt) 5011 rxr->hn_txr = &sc->hn_tx_ring[i]; 5012 rxr->hn_pktbuf_len = HN_PKTBUF_LEN_DEF; 5013 rxr->hn_pktbuf = malloc(rxr->hn_pktbuf_len, M_DEVBUF, M_WAITOK); 5014 rxr->hn_rx_idx = i; 5015 rxr->hn_rxbuf = sc->hn_rxbuf; 5016 5017 /* 5018 * Initialize LRO. 5019 */ 5020#if defined(INET) || defined(INET6) 5021#if __FreeBSD_version >= 1100095 5022 tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt, 5023 hn_lro_mbufq_depth); 5024#else 5025 tcp_lro_init(&rxr->hn_lro); 5026 rxr->hn_lro.ifp = sc->hn_ifp; 5027#endif 5028#if __FreeBSD_version >= 1100099 5029 rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF; 5030 rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF; 5031#endif 5032#endif /* INET || INET6 */ 5033 5034 if (sc->hn_rx_sysctl_tree != NULL) { 5035 char name[16]; 5036 5037 /* 5038 * Create per RX ring sysctl tree: 5039 * dev.hn.UNIT.rx.RINGID 5040 */ 5041 snprintf(name, sizeof(name), "%d", i); 5042 rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, 5043 SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree), 5044 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 5045 5046 if (rxr->hn_rx_sysctl_tree != NULL) { 5047 SYSCTL_ADD_ULONG(ctx, 5048 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 5049 OID_AUTO, "packets", CTLFLAG_RW, 5050 &rxr->hn_pkts, "# of packets received"); 5051 SYSCTL_ADD_ULONG(ctx, 5052 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 5053 OID_AUTO, "rss_pkts", CTLFLAG_RW, 5054 &rxr->hn_rss_pkts, 5055 "# of packets w/ RSS info received"); 5056 SYSCTL_ADD_ULONG(ctx, 5057 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 5058 OID_AUTO, "rsc_pkts", CTLFLAG_RW, 5059 &rxr->hn_rsc_pkts, 5060 "# of RSC packets received"); 5061 SYSCTL_ADD_ULONG(ctx, 5062 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 5063 OID_AUTO, "rsc_drop", CTLFLAG_RW, 5064 &rxr->hn_rsc_drop, 5065 "# of RSC fragments dropped"); 5066 SYSCTL_ADD_INT(ctx, 5067 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 5068 OID_AUTO, "pktbuf_len", CTLFLAG_RD, 5069 &rxr->hn_pktbuf_len, 0, 5070 "Temporary channel packet buffer length"); 5071 } 5072 } 5073 } 5074 5075 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued", 5076 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5077 __offsetof(struct hn_rx_ring, hn_lro.lro_queued), 5078#if __FreeBSD_version < 1100095 5079 hn_rx_stat_int_sysctl, 5080#else 5081 hn_rx_stat_u64_sysctl, 5082#endif 5083 "LU", "LRO queued"); 5084 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed", 5085 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5086 __offsetof(struct hn_rx_ring, hn_lro.lro_flushed), 5087#if __FreeBSD_version < 1100095 5088 hn_rx_stat_int_sysctl, 5089#else 5090 hn_rx_stat_u64_sysctl, 5091#endif 5092 "LU", "LRO flushed"); 5093 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried", 5094 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5095 __offsetof(struct hn_rx_ring, hn_lro_tried), 5096 hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries"); 5097#if __FreeBSD_version >= 1100099 5098 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim", 5099 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 5100 hn_lro_lenlim_sysctl, "IU", 5101 "Max # of data bytes to be aggregated by LRO"); 5102 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim", 5103 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 5104 hn_lro_ackcnt_sysctl, "I", 5105 "Max # of ACKs to be aggregated by LRO"); 5106#endif 5107 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp", 5108 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP, 5109 hn_trust_hcsum_sysctl, "I", 5110 "Trust tcp segement verification on host side, " 5111 "when csum info is missing"); 5112 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp", 5113 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP, 5114 hn_trust_hcsum_sysctl, "I", 5115 "Trust udp datagram verification on host side, " 5116 "when csum info is missing"); 5117 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip", 5118 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP, 5119 hn_trust_hcsum_sysctl, "I", 5120 "Trust ip packet verification on host side, " 5121 "when csum info is missing"); 5122 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip", 5123 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5124 __offsetof(struct hn_rx_ring, hn_csum_ip), 5125 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP"); 5126 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp", 5127 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5128 __offsetof(struct hn_rx_ring, hn_csum_tcp), 5129 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP"); 5130 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp", 5131 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5132 __offsetof(struct hn_rx_ring, hn_csum_udp), 5133 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP"); 5134 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted", 5135 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5136 __offsetof(struct hn_rx_ring, hn_csum_trusted), 5137 hn_rx_stat_ulong_sysctl, "LU", 5138 "# of packets that we trust host's csum verification"); 5139 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts", 5140 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5141 __offsetof(struct hn_rx_ring, hn_small_pkts), 5142 hn_rx_stat_ulong_sysctl, "LU", "# of small packets received"); 5143 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ack_failed", 5144 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5145 __offsetof(struct hn_rx_ring, hn_ack_failed), 5146 hn_rx_stat_ulong_sysctl, "LU", "# of RXBUF ack failures"); 5147 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt", 5148 CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings"); 5149 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse", 5150 CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings"); 5151 5152 return (0); 5153} 5154 5155static void 5156hn_destroy_rx_data(struct hn_softc *sc) 5157{ 5158 int i; 5159 5160 if (sc->hn_rxbuf != NULL) { 5161 if ((sc->hn_flags & HN_FLAG_RXBUF_REF) == 0) 5162 hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf); 5163 else 5164 device_printf(sc->hn_dev, "RXBUF is referenced\n"); 5165 sc->hn_rxbuf = NULL; 5166 } 5167 5168 if (sc->hn_rx_ring_cnt == 0) 5169 return; 5170 5171 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 5172 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 5173 5174 if (rxr->hn_br == NULL) 5175 continue; 5176 if ((rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) == 0) { 5177 hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br); 5178 } else { 5179 device_printf(sc->hn_dev, 5180 "%dth channel bufring is referenced", i); 5181 } 5182 rxr->hn_br = NULL; 5183 5184#if defined(INET) || defined(INET6) 5185 tcp_lro_free(&rxr->hn_lro); 5186#endif 5187 free(rxr->hn_pktbuf, M_DEVBUF); 5188 } 5189 free(sc->hn_rx_ring, M_DEVBUF); 5190 sc->hn_rx_ring = NULL; 5191 5192 sc->hn_rx_ring_cnt = 0; 5193 sc->hn_rx_ring_inuse = 0; 5194} 5195 5196static int 5197hn_tx_ring_create(struct hn_softc *sc, int id) 5198{ 5199 struct hn_tx_ring *txr = &sc->hn_tx_ring[id]; 5200 device_t dev = sc->hn_dev; 5201 bus_dma_tag_t parent_dtag; 5202 int error, i; 5203 5204 txr->hn_sc = sc; 5205 txr->hn_tx_idx = id; 5206 5207#ifndef HN_USE_TXDESC_BUFRING 5208 mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN); 5209#endif 5210 mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF); 5211 5212 txr->hn_txdesc_cnt = HN_TX_DESC_CNT; 5213 txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt, 5214 M_DEVBUF, M_WAITOK | M_ZERO); 5215#ifndef HN_USE_TXDESC_BUFRING 5216 SLIST_INIT(&txr->hn_txlist); 5217#else 5218 txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_DEVBUF, 5219 M_WAITOK, &txr->hn_tx_lock); 5220#endif 5221 5222 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_EVTTQ) { 5223 txr->hn_tx_taskq = VMBUS_GET_EVENT_TASKQ( 5224 device_get_parent(dev), dev, HN_RING_IDX2CPU(sc, id)); 5225 } else { 5226 txr->hn_tx_taskq = sc->hn_tx_taskqs[id % hn_tx_taskq_cnt]; 5227 } 5228 5229#ifdef HN_IFSTART_SUPPORT 5230 if (hn_use_if_start) { 5231 txr->hn_txeof = hn_start_txeof; 5232 TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr); 5233 TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr); 5234 } else 5235#endif 5236 { 5237 int br_depth; 5238 5239 txr->hn_txeof = hn_xmit_txeof; 5240 TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr); 5241 TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr); 5242 5243 br_depth = hn_get_txswq_depth(txr); 5244 txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_DEVBUF, 5245 M_WAITOK, &txr->hn_tx_lock); 5246 } 5247 5248 txr->hn_direct_tx_size = hn_direct_tx_size; 5249 5250 /* 5251 * Always schedule transmission instead of trying to do direct 5252 * transmission. This one gives the best performance so far. 5253 */ 5254 txr->hn_sched_tx = 1; 5255 5256 parent_dtag = bus_get_dma_tag(dev); 5257 5258 /* DMA tag for RNDIS packet messages. */ 5259 error = bus_dma_tag_create(parent_dtag, /* parent */ 5260 HN_RNDIS_PKT_ALIGN, /* alignment */ 5261 HN_RNDIS_PKT_BOUNDARY, /* boundary */ 5262 BUS_SPACE_MAXADDR, /* lowaddr */ 5263 BUS_SPACE_MAXADDR, /* highaddr */ 5264 NULL, NULL, /* filter, filterarg */ 5265 HN_RNDIS_PKT_LEN, /* maxsize */ 5266 1, /* nsegments */ 5267 HN_RNDIS_PKT_LEN, /* maxsegsize */ 5268 0, /* flags */ 5269 NULL, /* lockfunc */ 5270 NULL, /* lockfuncarg */ 5271 &txr->hn_tx_rndis_dtag); 5272 if (error) { 5273 device_printf(dev, "failed to create rndis dmatag\n"); 5274 return error; 5275 } 5276 5277 /* DMA tag for data. */ 5278 error = bus_dma_tag_create(parent_dtag, /* parent */ 5279 1, /* alignment */ 5280 HN_TX_DATA_BOUNDARY, /* boundary */ 5281 BUS_SPACE_MAXADDR, /* lowaddr */ 5282 BUS_SPACE_MAXADDR, /* highaddr */ 5283 NULL, NULL, /* filter, filterarg */ 5284 HN_TX_DATA_MAXSIZE, /* maxsize */ 5285 HN_TX_DATA_SEGCNT_MAX, /* nsegments */ 5286 HN_TX_DATA_SEGSIZE, /* maxsegsize */ 5287 0, /* flags */ 5288 NULL, /* lockfunc */ 5289 NULL, /* lockfuncarg */ 5290 &txr->hn_tx_data_dtag); 5291 if (error) { 5292 device_printf(dev, "failed to create data dmatag\n"); 5293 return error; 5294 } 5295 5296 for (i = 0; i < txr->hn_txdesc_cnt; ++i) { 5297 struct hn_txdesc *txd = &txr->hn_txdesc[i]; 5298 5299 txd->txr = txr; 5300 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 5301 STAILQ_INIT(&txd->agg_list); 5302 5303 /* 5304 * Allocate and load RNDIS packet message. 5305 */ 5306 error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag, 5307 (void **)&txd->rndis_pkt, 5308 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, 5309 &txd->rndis_pkt_dmap); 5310 if (error) { 5311 device_printf(dev, 5312 "failed to allocate rndis_packet_msg, %d\n", i); 5313 return error; 5314 } 5315 5316 error = bus_dmamap_load(txr->hn_tx_rndis_dtag, 5317 txd->rndis_pkt_dmap, 5318 txd->rndis_pkt, HN_RNDIS_PKT_LEN, 5319 hyperv_dma_map_paddr, &txd->rndis_pkt_paddr, 5320 BUS_DMA_NOWAIT); 5321 if (error) { 5322 device_printf(dev, 5323 "failed to load rndis_packet_msg, %d\n", i); 5324 bus_dmamem_free(txr->hn_tx_rndis_dtag, 5325 txd->rndis_pkt, txd->rndis_pkt_dmap); 5326 return error; 5327 } 5328 5329 /* DMA map for TX data. */ 5330 error = bus_dmamap_create(txr->hn_tx_data_dtag, 0, 5331 &txd->data_dmap); 5332 if (error) { 5333 device_printf(dev, 5334 "failed to allocate tx data dmamap\n"); 5335 bus_dmamap_unload(txr->hn_tx_rndis_dtag, 5336 txd->rndis_pkt_dmap); 5337 bus_dmamem_free(txr->hn_tx_rndis_dtag, 5338 txd->rndis_pkt, txd->rndis_pkt_dmap); 5339 return error; 5340 } 5341 5342 /* All set, put it to list */ 5343 txd->flags |= HN_TXD_FLAG_ONLIST; 5344#ifndef HN_USE_TXDESC_BUFRING 5345 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); 5346#else 5347 buf_ring_enqueue(txr->hn_txdesc_br, txd); 5348#endif 5349 } 5350 txr->hn_txdesc_avail = txr->hn_txdesc_cnt; 5351 5352 if (sc->hn_tx_sysctl_tree != NULL) { 5353 struct sysctl_oid_list *child; 5354 struct sysctl_ctx_list *ctx; 5355 char name[16]; 5356 5357 /* 5358 * Create per TX ring sysctl tree: 5359 * dev.hn.UNIT.tx.RINGID 5360 */ 5361 ctx = device_get_sysctl_ctx(dev); 5362 child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree); 5363 5364 snprintf(name, sizeof(name), "%d", id); 5365 txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, 5366 name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 5367 5368 if (txr->hn_tx_sysctl_tree != NULL) { 5369 child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree); 5370 5371#ifdef HN_DEBUG 5372 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail", 5373 CTLFLAG_RD, &txr->hn_txdesc_avail, 0, 5374 "# of available TX descs"); 5375#endif 5376#ifdef HN_IFSTART_SUPPORT 5377 if (!hn_use_if_start) 5378#endif 5379 { 5380 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive", 5381 CTLFLAG_RD, &txr->hn_oactive, 0, 5382 "over active"); 5383 } 5384 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets", 5385 CTLFLAG_RW, &txr->hn_pkts, 5386 "# of packets transmitted"); 5387 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "sends", 5388 CTLFLAG_RW, &txr->hn_sends, "# of sends"); 5389 } 5390 } 5391 5392 return 0; 5393} 5394 5395static void 5396hn_txdesc_dmamap_destroy(struct hn_txdesc *txd) 5397{ 5398 struct hn_tx_ring *txr = txd->txr; 5399 5400 KASSERT(txd->m == NULL, ("still has mbuf installed")); 5401 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped")); 5402 5403 bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap); 5404 bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt, 5405 txd->rndis_pkt_dmap); 5406 bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap); 5407} 5408 5409static void 5410hn_txdesc_gc(struct hn_tx_ring *txr, struct hn_txdesc *txd) 5411{ 5412 5413 KASSERT(txd->refs == 0 || txd->refs == 1, 5414 ("invalid txd refs %d", txd->refs)); 5415 5416 /* Aggregated txds will be freed by their aggregating txd. */ 5417 if (txd->refs > 0 && (txd->flags & HN_TXD_FLAG_ONAGG) == 0) { 5418 int freed; 5419 5420 freed = hn_txdesc_put(txr, txd); 5421 KASSERT(freed, ("can't free txdesc")); 5422 } 5423} 5424 5425static void 5426hn_tx_ring_destroy(struct hn_tx_ring *txr) 5427{ 5428 int i; 5429 5430 if (txr->hn_txdesc == NULL) 5431 return; 5432 5433 /* 5434 * NOTE: 5435 * Because the freeing of aggregated txds will be deferred 5436 * to the aggregating txd, two passes are used here: 5437 * - The first pass GCes any pending txds. This GC is necessary, 5438 * since if the channels are revoked, hypervisor will not 5439 * deliver send-done for all pending txds. 5440 * - The second pass frees the busdma stuffs, i.e. after all txds 5441 * were freed. 5442 */ 5443 for (i = 0; i < txr->hn_txdesc_cnt; ++i) 5444 hn_txdesc_gc(txr, &txr->hn_txdesc[i]); 5445 for (i = 0; i < txr->hn_txdesc_cnt; ++i) 5446 hn_txdesc_dmamap_destroy(&txr->hn_txdesc[i]); 5447 5448 if (txr->hn_tx_data_dtag != NULL) 5449 bus_dma_tag_destroy(txr->hn_tx_data_dtag); 5450 if (txr->hn_tx_rndis_dtag != NULL) 5451 bus_dma_tag_destroy(txr->hn_tx_rndis_dtag); 5452 5453#ifdef HN_USE_TXDESC_BUFRING 5454 buf_ring_free(txr->hn_txdesc_br, M_DEVBUF); 5455#endif 5456 5457 free(txr->hn_txdesc, M_DEVBUF); 5458 txr->hn_txdesc = NULL; 5459 5460 if (txr->hn_mbuf_br != NULL) 5461 buf_ring_free(txr->hn_mbuf_br, M_DEVBUF); 5462 5463#ifndef HN_USE_TXDESC_BUFRING 5464 mtx_destroy(&txr->hn_txlist_spin); 5465#endif 5466 mtx_destroy(&txr->hn_tx_lock); 5467} 5468 5469static int 5470hn_create_tx_data(struct hn_softc *sc, int ring_cnt) 5471{ 5472 struct sysctl_oid_list *child; 5473 struct sysctl_ctx_list *ctx; 5474 int i; 5475 5476 /* 5477 * Create TXBUF for chimney sending. 5478 * 5479 * NOTE: It is shared by all channels. 5480 */ 5481 sc->hn_chim = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev), 5482 PAGE_SIZE, 0, HN_CHIM_SIZE, &sc->hn_chim_dma, 5483 BUS_DMA_WAITOK | BUS_DMA_ZERO); 5484 if (sc->hn_chim == NULL) { 5485 device_printf(sc->hn_dev, "allocate txbuf failed\n"); 5486 return (ENOMEM); 5487 } 5488 5489 sc->hn_tx_ring_cnt = ring_cnt; 5490 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; 5491 5492 sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt, 5493 M_DEVBUF, M_WAITOK | M_ZERO); 5494 5495 ctx = device_get_sysctl_ctx(sc->hn_dev); 5496 child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev)); 5497 5498 /* Create dev.hn.UNIT.tx sysctl tree */ 5499 sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx", 5500 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 5501 5502 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 5503 int error; 5504 5505 error = hn_tx_ring_create(sc, i); 5506 if (error) 5507 return error; 5508 } 5509 5510 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs", 5511 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5512 __offsetof(struct hn_tx_ring, hn_no_txdescs), 5513 hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs"); 5514 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed", 5515 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5516 __offsetof(struct hn_tx_ring, hn_send_failed), 5517 hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure"); 5518 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed", 5519 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5520 __offsetof(struct hn_tx_ring, hn_txdma_failed), 5521 hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure"); 5522 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_flush_failed", 5523 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5524 __offsetof(struct hn_tx_ring, hn_flush_failed), 5525 hn_tx_stat_ulong_sysctl, "LU", 5526 "# of packet transmission aggregation flush failure"); 5527 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed", 5528 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5529 __offsetof(struct hn_tx_ring, hn_tx_collapsed), 5530 hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed"); 5531 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney", 5532 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5533 __offsetof(struct hn_tx_ring, hn_tx_chimney), 5534 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send"); 5535 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried", 5536 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5537 __offsetof(struct hn_tx_ring, hn_tx_chimney_tried), 5538 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries"); 5539 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt", 5540 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0, 5541 "# of total TX descs"); 5542 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max", 5543 CTLFLAG_RD, &sc->hn_chim_szmax, 0, 5544 "Chimney send packet size upper boundary"); 5545 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size", 5546 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 5547 hn_chim_size_sysctl, "I", "Chimney send packet size limit"); 5548 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size", 5549 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5550 __offsetof(struct hn_tx_ring, hn_direct_tx_size), 5551 hn_tx_conf_int_sysctl, "I", 5552 "Size of the packet for direct transmission"); 5553 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx", 5554 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5555 __offsetof(struct hn_tx_ring, hn_sched_tx), 5556 hn_tx_conf_int_sysctl, "I", 5557 "Always schedule transmission " 5558 "instead of doing direct transmission"); 5559 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt", 5560 CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings"); 5561 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse", 5562 CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings"); 5563 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "agg_szmax", 5564 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_agg_szmax, 0, 5565 "Applied packet transmission aggregation size"); 5566 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pktmax", 5567 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 5568 hn_txagg_pktmax_sysctl, "I", 5569 "Applied packet transmission aggregation packets"); 5570 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_align", 5571 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 5572 hn_txagg_align_sysctl, "I", 5573 "Applied packet transmission aggregation alignment"); 5574 5575 return 0; 5576} 5577 5578static void 5579hn_set_chim_size(struct hn_softc *sc, int chim_size) 5580{ 5581 int i; 5582 5583 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5584 sc->hn_tx_ring[i].hn_chim_size = chim_size; 5585} 5586 5587static void 5588hn_set_tso_maxsize(struct hn_softc *sc, int tso_maxlen, int mtu) 5589{ 5590 struct ifnet *ifp = sc->hn_ifp; 5591 u_int hw_tsomax; 5592 int tso_minlen; 5593 5594 HN_LOCK_ASSERT(sc); 5595 5596 if ((ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) == 0) 5597 return; 5598 5599 KASSERT(sc->hn_ndis_tso_sgmin >= 2, 5600 ("invalid NDIS tso sgmin %d", sc->hn_ndis_tso_sgmin)); 5601 tso_minlen = sc->hn_ndis_tso_sgmin * mtu; 5602 5603 KASSERT(sc->hn_ndis_tso_szmax >= tso_minlen && 5604 sc->hn_ndis_tso_szmax <= IP_MAXPACKET, 5605 ("invalid NDIS tso szmax %d", sc->hn_ndis_tso_szmax)); 5606 5607 if (tso_maxlen < tso_minlen) 5608 tso_maxlen = tso_minlen; 5609 else if (tso_maxlen > IP_MAXPACKET) 5610 tso_maxlen = IP_MAXPACKET; 5611 if (tso_maxlen > sc->hn_ndis_tso_szmax) 5612 tso_maxlen = sc->hn_ndis_tso_szmax; 5613 hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 5614 5615 if (hn_xpnt_vf_isready(sc)) { 5616 if (hw_tsomax > sc->hn_vf_ifp->if_hw_tsomax) 5617 hw_tsomax = sc->hn_vf_ifp->if_hw_tsomax; 5618 } 5619 ifp->if_hw_tsomax = hw_tsomax; 5620 if (bootverbose) 5621 if_printf(ifp, "TSO size max %u\n", ifp->if_hw_tsomax); 5622} 5623 5624static void 5625hn_fixup_tx_data(struct hn_softc *sc) 5626{ 5627 uint64_t csum_assist; 5628 int i; 5629 5630 hn_set_chim_size(sc, sc->hn_chim_szmax); 5631 if (hn_tx_chimney_size > 0 && 5632 hn_tx_chimney_size < sc->hn_chim_szmax) 5633 hn_set_chim_size(sc, hn_tx_chimney_size); 5634 5635 csum_assist = 0; 5636 if (sc->hn_caps & HN_CAP_IPCS) 5637 csum_assist |= CSUM_IP; 5638 if (sc->hn_caps & HN_CAP_TCP4CS) 5639 csum_assist |= CSUM_IP_TCP; 5640 if ((sc->hn_caps & HN_CAP_UDP4CS) && hn_enable_udp4cs) 5641 csum_assist |= CSUM_IP_UDP; 5642 if (sc->hn_caps & HN_CAP_TCP6CS) 5643 csum_assist |= CSUM_IP6_TCP; 5644 if ((sc->hn_caps & HN_CAP_UDP6CS) && hn_enable_udp6cs) 5645 csum_assist |= CSUM_IP6_UDP; 5646 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5647 sc->hn_tx_ring[i].hn_csum_assist = csum_assist; 5648 5649 if (sc->hn_caps & HN_CAP_HASHVAL) { 5650 /* 5651 * Support HASHVAL pktinfo on TX path. 5652 */ 5653 if (bootverbose) 5654 if_printf(sc->hn_ifp, "support HASHVAL pktinfo\n"); 5655 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5656 sc->hn_tx_ring[i].hn_tx_flags |= HN_TX_FLAG_HASHVAL; 5657 } 5658} 5659 5660static void 5661hn_fixup_rx_data(struct hn_softc *sc) 5662{ 5663 5664 if (sc->hn_caps & HN_CAP_UDPHASH) { 5665 int i; 5666 5667 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 5668 sc->hn_rx_ring[i].hn_rx_flags |= HN_RX_FLAG_UDP_HASH; 5669 } 5670} 5671 5672static void 5673hn_destroy_tx_data(struct hn_softc *sc) 5674{ 5675 int i; 5676 5677 if (sc->hn_chim != NULL) { 5678 if ((sc->hn_flags & HN_FLAG_CHIM_REF) == 0) { 5679 hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim); 5680 } else { 5681 device_printf(sc->hn_dev, 5682 "chimney sending buffer is referenced"); 5683 } 5684 sc->hn_chim = NULL; 5685 } 5686 5687 if (sc->hn_tx_ring_cnt == 0) 5688 return; 5689 5690 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5691 hn_tx_ring_destroy(&sc->hn_tx_ring[i]); 5692 5693 free(sc->hn_tx_ring, M_DEVBUF); 5694 sc->hn_tx_ring = NULL; 5695 5696 sc->hn_tx_ring_cnt = 0; 5697 sc->hn_tx_ring_inuse = 0; 5698} 5699 5700#ifdef HN_IFSTART_SUPPORT 5701 5702static void 5703hn_start_taskfunc(void *xtxr, int pending __unused) 5704{ 5705 struct hn_tx_ring *txr = xtxr; 5706 5707 mtx_lock(&txr->hn_tx_lock); 5708 hn_start_locked(txr, 0); 5709 mtx_unlock(&txr->hn_tx_lock); 5710} 5711 5712static int 5713hn_start_locked(struct hn_tx_ring *txr, int len) 5714{ 5715 struct hn_softc *sc = txr->hn_sc; 5716 struct ifnet *ifp = sc->hn_ifp; 5717 int sched = 0; 5718 5719 KASSERT(hn_use_if_start, 5720 ("hn_start_locked is called, when if_start is disabled")); 5721 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); 5722 mtx_assert(&txr->hn_tx_lock, MA_OWNED); 5723 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 5724 5725 if (__predict_false(txr->hn_suspended)) 5726 return (0); 5727 5728 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != 5729 IFF_DRV_RUNNING) 5730 return (0); 5731 5732 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 5733 struct hn_txdesc *txd; 5734 struct mbuf *m_head; 5735 int error; 5736 5737 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); 5738 if (m_head == NULL) 5739 break; 5740 5741 if (len > 0 && m_head->m_pkthdr.len > len) { 5742 /* 5743 * This sending could be time consuming; let callers 5744 * dispatch this packet sending (and sending of any 5745 * following up packets) to tx taskqueue. 5746 */ 5747 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 5748 sched = 1; 5749 break; 5750 } 5751 5752#if defined(INET6) || defined(INET) 5753 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 5754 m_head = hn_tso_fixup(m_head); 5755 if (__predict_false(m_head == NULL)) { 5756 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 5757 continue; 5758 } 5759 } else if (m_head->m_pkthdr.csum_flags & 5760 (CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP6_UDP | CSUM_IP6_TCP)) { 5761 m_head = hn_set_hlen(m_head); 5762 if (__predict_false(m_head == NULL)) { 5763 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 5764 continue; 5765 } 5766 } 5767#endif 5768 5769 txd = hn_txdesc_get(txr); 5770 if (txd == NULL) { 5771 txr->hn_no_txdescs++; 5772 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 5773 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 5774 break; 5775 } 5776 5777 error = hn_encap(ifp, txr, txd, &m_head); 5778 if (error) { 5779 /* Both txd and m_head are freed */ 5780 KASSERT(txr->hn_agg_txd == NULL, 5781 ("encap failed w/ pending aggregating txdesc")); 5782 continue; 5783 } 5784 5785 if (txr->hn_agg_pktleft == 0) { 5786 if (txr->hn_agg_txd != NULL) { 5787 KASSERT(m_head == NULL, 5788 ("pending mbuf for aggregating txdesc")); 5789 error = hn_flush_txagg(ifp, txr); 5790 if (__predict_false(error)) { 5791 atomic_set_int(&ifp->if_drv_flags, 5792 IFF_DRV_OACTIVE); 5793 break; 5794 } 5795 } else { 5796 KASSERT(m_head != NULL, ("mbuf was freed")); 5797 error = hn_txpkt(ifp, txr, txd); 5798 if (__predict_false(error)) { 5799 /* txd is freed, but m_head is not */ 5800 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 5801 atomic_set_int(&ifp->if_drv_flags, 5802 IFF_DRV_OACTIVE); 5803 break; 5804 } 5805 } 5806 } 5807#ifdef INVARIANTS 5808 else { 5809 KASSERT(txr->hn_agg_txd != NULL, 5810 ("no aggregating txdesc")); 5811 KASSERT(m_head == NULL, 5812 ("pending mbuf for aggregating txdesc")); 5813 } 5814#endif 5815 } 5816 5817 /* Flush pending aggerated transmission. */ 5818 if (txr->hn_agg_txd != NULL) 5819 hn_flush_txagg(ifp, txr); 5820 return (sched); 5821} 5822 5823static void 5824hn_start(struct ifnet *ifp) 5825{ 5826 struct hn_softc *sc = ifp->if_softc; 5827 struct hn_tx_ring *txr = &sc->hn_tx_ring[0]; 5828 5829 if (txr->hn_sched_tx) 5830 goto do_sched; 5831 5832 if (mtx_trylock(&txr->hn_tx_lock)) { 5833 int sched; 5834 5835 sched = hn_start_locked(txr, txr->hn_direct_tx_size); 5836 mtx_unlock(&txr->hn_tx_lock); 5837 if (!sched) 5838 return; 5839 } 5840do_sched: 5841 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); 5842} 5843 5844static void 5845hn_start_txeof_taskfunc(void *xtxr, int pending __unused) 5846{ 5847 struct hn_tx_ring *txr = xtxr; 5848 5849 mtx_lock(&txr->hn_tx_lock); 5850 atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE); 5851 hn_start_locked(txr, 0); 5852 mtx_unlock(&txr->hn_tx_lock); 5853} 5854 5855static void 5856hn_start_txeof(struct hn_tx_ring *txr) 5857{ 5858 struct hn_softc *sc = txr->hn_sc; 5859 struct ifnet *ifp = sc->hn_ifp; 5860 5861 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); 5862 5863 if (txr->hn_sched_tx) 5864 goto do_sched; 5865 5866 if (mtx_trylock(&txr->hn_tx_lock)) { 5867 int sched; 5868 5869 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 5870 sched = hn_start_locked(txr, txr->hn_direct_tx_size); 5871 mtx_unlock(&txr->hn_tx_lock); 5872 if (sched) { 5873 taskqueue_enqueue(txr->hn_tx_taskq, 5874 &txr->hn_tx_task); 5875 } 5876 } else { 5877do_sched: 5878 /* 5879 * Release the OACTIVE earlier, with the hope, that 5880 * others could catch up. The task will clear the 5881 * flag again with the hn_tx_lock to avoid possible 5882 * races. 5883 */ 5884 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 5885 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 5886 } 5887} 5888 5889#endif /* HN_IFSTART_SUPPORT */ 5890 5891static int 5892hn_xmit(struct hn_tx_ring *txr, int len) 5893{ 5894 struct hn_softc *sc = txr->hn_sc; 5895 struct ifnet *ifp = sc->hn_ifp; 5896 struct mbuf *m_head; 5897 int sched = 0; 5898 5899 mtx_assert(&txr->hn_tx_lock, MA_OWNED); 5900#ifdef HN_IFSTART_SUPPORT 5901 KASSERT(hn_use_if_start == 0, 5902 ("hn_xmit is called, when if_start is enabled")); 5903#endif 5904 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 5905 5906 if (__predict_false(txr->hn_suspended)) 5907 return (0); 5908 5909 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive) 5910 return (0); 5911 5912 while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) { 5913 struct hn_txdesc *txd; 5914 int error; 5915 5916 if (len > 0 && m_head->m_pkthdr.len > len) { 5917 /* 5918 * This sending could be time consuming; let callers 5919 * dispatch this packet sending (and sending of any 5920 * following up packets) to tx taskqueue. 5921 */ 5922 drbr_putback(ifp, txr->hn_mbuf_br, m_head); 5923 sched = 1; 5924 break; 5925 } 5926 5927 txd = hn_txdesc_get(txr); 5928 if (txd == NULL) { 5929 txr->hn_no_txdescs++; 5930 drbr_putback(ifp, txr->hn_mbuf_br, m_head); 5931 txr->hn_oactive = 1; 5932 break; 5933 } 5934 5935 error = hn_encap(ifp, txr, txd, &m_head); 5936 if (error) { 5937 /* Both txd and m_head are freed; discard */ 5938 KASSERT(txr->hn_agg_txd == NULL, 5939 ("encap failed w/ pending aggregating txdesc")); 5940 drbr_advance(ifp, txr->hn_mbuf_br); 5941 continue; 5942 } 5943 5944 if (txr->hn_agg_pktleft == 0) { 5945 if (txr->hn_agg_txd != NULL) { 5946 KASSERT(m_head == NULL, 5947 ("pending mbuf for aggregating txdesc")); 5948 error = hn_flush_txagg(ifp, txr); 5949 if (__predict_false(error)) { 5950 txr->hn_oactive = 1; 5951 break; 5952 } 5953 } else { 5954 KASSERT(m_head != NULL, ("mbuf was freed")); 5955 error = hn_txpkt(ifp, txr, txd); 5956 if (__predict_false(error)) { 5957 /* txd is freed, but m_head is not */ 5958 drbr_putback(ifp, txr->hn_mbuf_br, 5959 m_head); 5960 txr->hn_oactive = 1; 5961 break; 5962 } 5963 } 5964 } 5965#ifdef INVARIANTS 5966 else { 5967 KASSERT(txr->hn_agg_txd != NULL, 5968 ("no aggregating txdesc")); 5969 KASSERT(m_head == NULL, 5970 ("pending mbuf for aggregating txdesc")); 5971 } 5972#endif 5973 5974 /* Sent */ 5975 drbr_advance(ifp, txr->hn_mbuf_br); 5976 } 5977 5978 /* Flush pending aggerated transmission. */ 5979 if (txr->hn_agg_txd != NULL) 5980 hn_flush_txagg(ifp, txr); 5981 return (sched); 5982} 5983 5984static int 5985hn_transmit(struct ifnet *ifp, struct mbuf *m) 5986{ 5987 struct hn_softc *sc = ifp->if_softc; 5988 struct hn_tx_ring *txr; 5989 int error, idx = 0; 5990 5991 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) { 5992 struct rm_priotracker pt; 5993 5994 rm_rlock(&sc->hn_vf_lock, &pt); 5995 if (__predict_true(sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) { 5996 struct mbuf *m_bpf = NULL; 5997 int obytes, omcast; 5998 5999 obytes = m->m_pkthdr.len; 6000 omcast = (m->m_flags & M_MCAST) != 0; 6001 6002 if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF) { 6003 if (bpf_peers_present(ifp->if_bpf)) { 6004 m_bpf = m_copypacket(m, M_NOWAIT); 6005 if (m_bpf == NULL) { 6006 /* 6007 * Failed to grab a shallow 6008 * copy; tap now. 6009 */ 6010 ETHER_BPF_MTAP(ifp, m); 6011 } 6012 } 6013 } else { 6014 ETHER_BPF_MTAP(ifp, m); 6015 } 6016 6017 error = sc->hn_vf_ifp->if_transmit(sc->hn_vf_ifp, m); 6018 rm_runlock(&sc->hn_vf_lock, &pt); 6019 6020 if (m_bpf != NULL) { 6021 if (!error) 6022 ETHER_BPF_MTAP(ifp, m_bpf); 6023 m_freem(m_bpf); 6024 } 6025 6026 if (error == ENOBUFS) { 6027 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 6028 } else if (error) { 6029 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 6030 } else { 6031 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 6032 if_inc_counter(ifp, IFCOUNTER_OBYTES, obytes); 6033 if (omcast) { 6034 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 6035 omcast); 6036 } 6037 } 6038 return (error); 6039 } 6040 rm_runlock(&sc->hn_vf_lock, &pt); 6041 } 6042 6043#if defined(INET6) || defined(INET) 6044 /* 6045 * Perform TSO packet header fixup or get l2/l3 header length now, 6046 * since packet headers should be cache-hot. 6047 */ 6048 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 6049 m = hn_tso_fixup(m); 6050 if (__predict_false(m == NULL)) { 6051 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 6052 return EIO; 6053 } 6054 } else if (m->m_pkthdr.csum_flags & 6055 (CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP6_UDP | CSUM_IP6_TCP)) { 6056 m = hn_set_hlen(m); 6057 if (__predict_false(m == NULL)) { 6058 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 6059 return EIO; 6060 } 6061 } 6062#endif 6063 6064 /* 6065 * Select the TX ring based on flowid 6066 */ 6067 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 6068#ifdef RSS 6069 uint32_t bid; 6070 6071 if (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m), 6072 &bid) == 0) 6073 idx = bid % sc->hn_tx_ring_inuse; 6074 else 6075#endif 6076 { 6077#if defined(INET6) || defined(INET) 6078 int tcpsyn = 0; 6079 6080 if (m->m_pkthdr.len < 128 && 6081 (m->m_pkthdr.csum_flags & 6082 (CSUM_IP_TCP | CSUM_IP6_TCP)) && 6083 (m->m_pkthdr.csum_flags & CSUM_TSO) == 0) { 6084 m = hn_check_tcpsyn(m, &tcpsyn); 6085 if (__predict_false(m == NULL)) { 6086 if_inc_counter(ifp, 6087 IFCOUNTER_OERRORS, 1); 6088 return (EIO); 6089 } 6090 } 6091#else 6092 const int tcpsyn = 0; 6093#endif 6094 if (tcpsyn) 6095 idx = 0; 6096 else 6097 idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse; 6098 } 6099 } 6100 txr = &sc->hn_tx_ring[idx]; 6101 6102 error = drbr_enqueue(ifp, txr->hn_mbuf_br, m); 6103 if (error) { 6104 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 6105 return error; 6106 } 6107 6108 if (txr->hn_oactive) 6109 return 0; 6110 6111 if (txr->hn_sched_tx) 6112 goto do_sched; 6113 6114 if (mtx_trylock(&txr->hn_tx_lock)) { 6115 int sched; 6116 6117 sched = hn_xmit(txr, txr->hn_direct_tx_size); 6118 mtx_unlock(&txr->hn_tx_lock); 6119 if (!sched) 6120 return 0; 6121 } 6122do_sched: 6123 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); 6124 return 0; 6125} 6126 6127static void 6128hn_tx_ring_qflush(struct hn_tx_ring *txr) 6129{ 6130 struct mbuf *m; 6131 6132 mtx_lock(&txr->hn_tx_lock); 6133 while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL) 6134 m_freem(m); 6135 mtx_unlock(&txr->hn_tx_lock); 6136} 6137 6138static void 6139hn_xmit_qflush(struct ifnet *ifp) 6140{ 6141 struct hn_softc *sc = ifp->if_softc; 6142 struct rm_priotracker pt; 6143 int i; 6144 6145 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 6146 hn_tx_ring_qflush(&sc->hn_tx_ring[i]); 6147 if_qflush(ifp); 6148 6149 rm_rlock(&sc->hn_vf_lock, &pt); 6150 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 6151 sc->hn_vf_ifp->if_qflush(sc->hn_vf_ifp); 6152 rm_runlock(&sc->hn_vf_lock, &pt); 6153} 6154 6155static void 6156hn_xmit_txeof(struct hn_tx_ring *txr) 6157{ 6158 6159 if (txr->hn_sched_tx) 6160 goto do_sched; 6161 6162 if (mtx_trylock(&txr->hn_tx_lock)) { 6163 int sched; 6164 6165 txr->hn_oactive = 0; 6166 sched = hn_xmit(txr, txr->hn_direct_tx_size); 6167 mtx_unlock(&txr->hn_tx_lock); 6168 if (sched) { 6169 taskqueue_enqueue(txr->hn_tx_taskq, 6170 &txr->hn_tx_task); 6171 } 6172 } else { 6173do_sched: 6174 /* 6175 * Release the oactive earlier, with the hope, that 6176 * others could catch up. The task will clear the 6177 * oactive again with the hn_tx_lock to avoid possible 6178 * races. 6179 */ 6180 txr->hn_oactive = 0; 6181 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 6182 } 6183} 6184 6185static void 6186hn_xmit_taskfunc(void *xtxr, int pending __unused) 6187{ 6188 struct hn_tx_ring *txr = xtxr; 6189 6190 mtx_lock(&txr->hn_tx_lock); 6191 hn_xmit(txr, 0); 6192 mtx_unlock(&txr->hn_tx_lock); 6193} 6194 6195static void 6196hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused) 6197{ 6198 struct hn_tx_ring *txr = xtxr; 6199 6200 mtx_lock(&txr->hn_tx_lock); 6201 txr->hn_oactive = 0; 6202 hn_xmit(txr, 0); 6203 mtx_unlock(&txr->hn_tx_lock); 6204} 6205 6206static int 6207hn_chan_attach(struct hn_softc *sc, struct vmbus_channel *chan) 6208{ 6209 struct vmbus_chan_br cbr; 6210 struct hn_rx_ring *rxr; 6211 struct hn_tx_ring *txr = NULL; 6212 int idx, error; 6213 6214 idx = vmbus_chan_subidx(chan); 6215 6216 /* 6217 * Link this channel to RX/TX ring. 6218 */ 6219 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, 6220 ("invalid channel index %d, should > 0 && < %d", 6221 idx, sc->hn_rx_ring_inuse)); 6222 rxr = &sc->hn_rx_ring[idx]; 6223 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0, 6224 ("RX ring %d already attached", idx)); 6225 rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED; 6226 rxr->hn_chan = chan; 6227 6228 if (bootverbose) { 6229 if_printf(sc->hn_ifp, "link RX ring %d to chan%u\n", 6230 idx, vmbus_chan_id(chan)); 6231 } 6232 6233 if (idx < sc->hn_tx_ring_inuse) { 6234 txr = &sc->hn_tx_ring[idx]; 6235 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0, 6236 ("TX ring %d already attached", idx)); 6237 txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED; 6238 6239 txr->hn_chan = chan; 6240 if (bootverbose) { 6241 if_printf(sc->hn_ifp, "link TX ring %d to chan%u\n", 6242 idx, vmbus_chan_id(chan)); 6243 } 6244 } 6245 6246 /* Bind this channel to a proper CPU. */ 6247 vmbus_chan_cpu_set(chan, HN_RING_IDX2CPU(sc, idx)); 6248 6249 /* 6250 * Open this channel 6251 */ 6252 cbr.cbr = rxr->hn_br; 6253 cbr.cbr_paddr = rxr->hn_br_dma.hv_paddr; 6254 cbr.cbr_txsz = HN_TXBR_SIZE; 6255 cbr.cbr_rxsz = HN_RXBR_SIZE; 6256 error = vmbus_chan_open_br(chan, &cbr, NULL, 0, hn_chan_callback, rxr); 6257 if (error) { 6258 if (error == EISCONN) { 6259 if_printf(sc->hn_ifp, "bufring is connected after " 6260 "chan%u open failure\n", vmbus_chan_id(chan)); 6261 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF; 6262 } else { 6263 if_printf(sc->hn_ifp, "open chan%u failed: %d\n", 6264 vmbus_chan_id(chan), error); 6265 } 6266 } 6267 return (error); 6268} 6269 6270static void 6271hn_chan_detach(struct hn_softc *sc, struct vmbus_channel *chan) 6272{ 6273 struct hn_rx_ring *rxr; 6274 int idx, error; 6275 6276 idx = vmbus_chan_subidx(chan); 6277 6278 /* 6279 * Link this channel to RX/TX ring. 6280 */ 6281 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, 6282 ("invalid channel index %d, should > 0 && < %d", 6283 idx, sc->hn_rx_ring_inuse)); 6284 rxr = &sc->hn_rx_ring[idx]; 6285 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED), 6286 ("RX ring %d is not attached", idx)); 6287 rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED; 6288 6289 if (idx < sc->hn_tx_ring_inuse) { 6290 struct hn_tx_ring *txr = &sc->hn_tx_ring[idx]; 6291 6292 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED), 6293 ("TX ring %d is not attached attached", idx)); 6294 txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED; 6295 } 6296 6297 /* 6298 * Close this channel. 6299 * 6300 * NOTE: 6301 * Channel closing does _not_ destroy the target channel. 6302 */ 6303 error = vmbus_chan_close_direct(chan); 6304 if (error == EISCONN) { 6305 if_printf(sc->hn_ifp, "chan%u bufring is connected " 6306 "after being closed\n", vmbus_chan_id(chan)); 6307 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF; 6308 } else if (error) { 6309 if_printf(sc->hn_ifp, "chan%u close failed: %d\n", 6310 vmbus_chan_id(chan), error); 6311 } 6312} 6313 6314static int 6315hn_attach_subchans(struct hn_softc *sc) 6316{ 6317 struct vmbus_channel **subchans; 6318 int subchan_cnt = sc->hn_rx_ring_inuse - 1; 6319 int i, error = 0; 6320 6321 KASSERT(subchan_cnt > 0, ("no sub-channels")); 6322 6323 /* Attach the sub-channels. */ 6324 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); 6325 for (i = 0; i < subchan_cnt; ++i) { 6326 int error1; 6327 6328 error1 = hn_chan_attach(sc, subchans[i]); 6329 if (error1) { 6330 error = error1; 6331 /* Move on; all channels will be detached later. */ 6332 } 6333 } 6334 vmbus_subchan_rel(subchans, subchan_cnt); 6335 6336 if (error) { 6337 if_printf(sc->hn_ifp, "sub-channels attach failed: %d\n", error); 6338 } else { 6339 if (bootverbose) { 6340 if_printf(sc->hn_ifp, "%d sub-channels attached\n", 6341 subchan_cnt); 6342 } 6343 } 6344 return (error); 6345} 6346 6347static void 6348hn_detach_allchans(struct hn_softc *sc) 6349{ 6350 struct vmbus_channel **subchans; 6351 int subchan_cnt = sc->hn_rx_ring_inuse - 1; 6352 int i; 6353 6354 if (subchan_cnt == 0) 6355 goto back; 6356 6357 /* Detach the sub-channels. */ 6358 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); 6359 for (i = 0; i < subchan_cnt; ++i) 6360 hn_chan_detach(sc, subchans[i]); 6361 vmbus_subchan_rel(subchans, subchan_cnt); 6362 6363back: 6364 /* 6365 * Detach the primary channel, _after_ all sub-channels 6366 * are detached. 6367 */ 6368 hn_chan_detach(sc, sc->hn_prichan); 6369 6370 /* Wait for sub-channels to be destroyed, if any. */ 6371 vmbus_subchan_drain(sc->hn_prichan); 6372 6373#ifdef INVARIANTS 6374 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 6375 KASSERT((sc->hn_rx_ring[i].hn_rx_flags & 6376 HN_RX_FLAG_ATTACHED) == 0, 6377 ("%dth RX ring is still attached", i)); 6378 } 6379 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 6380 KASSERT((sc->hn_tx_ring[i].hn_tx_flags & 6381 HN_TX_FLAG_ATTACHED) == 0, 6382 ("%dth TX ring is still attached", i)); 6383 } 6384#endif 6385} 6386 6387static int 6388hn_synth_alloc_subchans(struct hn_softc *sc, int *nsubch) 6389{ 6390 struct vmbus_channel **subchans; 6391 int nchan, rxr_cnt, error; 6392 6393 nchan = *nsubch + 1; 6394 if (nchan == 1) { 6395 /* 6396 * Multiple RX/TX rings are not requested. 6397 */ 6398 *nsubch = 0; 6399 return (0); 6400 } 6401 6402 /* 6403 * Query RSS capabilities, e.g. # of RX rings, and # of indirect 6404 * table entries. 6405 */ 6406 error = hn_rndis_query_rsscaps(sc, &rxr_cnt); 6407 if (error) { 6408 /* No RSS; this is benign. */ 6409 *nsubch = 0; 6410 return (0); 6411 } 6412 if (bootverbose) { 6413 if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n", 6414 rxr_cnt, nchan); 6415 } 6416 6417 if (nchan > rxr_cnt) 6418 nchan = rxr_cnt; 6419 if (nchan == 1) { 6420 if_printf(sc->hn_ifp, "only 1 channel is supported, no vRSS\n"); 6421 *nsubch = 0; 6422 return (0); 6423 } 6424 6425 /* 6426 * Allocate sub-channels from NVS. 6427 */ 6428 *nsubch = nchan - 1; 6429 error = hn_nvs_alloc_subchans(sc, nsubch); 6430 if (error || *nsubch == 0) { 6431 /* Failed to allocate sub-channels. */ 6432 *nsubch = 0; 6433 return (0); 6434 } 6435 6436 /* 6437 * Wait for all sub-channels to become ready before moving on. 6438 */ 6439 subchans = vmbus_subchan_get(sc->hn_prichan, *nsubch); 6440 vmbus_subchan_rel(subchans, *nsubch); 6441 return (0); 6442} 6443 6444static bool 6445hn_synth_attachable(const struct hn_softc *sc) 6446{ 6447 int i; 6448 6449 if (sc->hn_flags & HN_FLAG_ERRORS) 6450 return (false); 6451 6452 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 6453 const struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 6454 6455 if (rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) 6456 return (false); 6457 } 6458 return (true); 6459} 6460 6461/* 6462 * Make sure that the RX filter is zero after the successful 6463 * RNDIS initialization. 6464 * 6465 * NOTE: 6466 * Under certain conditions on certain versions of Hyper-V, 6467 * the RNDIS rxfilter is _not_ zero on the hypervisor side 6468 * after the successful RNDIS initialization, which breaks 6469 * the assumption of any following code (well, it breaks the 6470 * RNDIS API contract actually). Clear the RNDIS rxfilter 6471 * explicitly, drain packets sneaking through, and drain the 6472 * interrupt taskqueues scheduled due to the stealth packets. 6473 */ 6474static void 6475hn_rndis_init_fixat(struct hn_softc *sc, int nchan) 6476{ 6477 6478 hn_disable_rx(sc); 6479 hn_drain_rxtx(sc, nchan); 6480} 6481 6482static int 6483hn_synth_attach(struct hn_softc *sc, int mtu) 6484{ 6485#define ATTACHED_NVS 0x0002 6486#define ATTACHED_RNDIS 0x0004 6487 6488 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; 6489 int error, nsubch, nchan = 1, i, rndis_inited; 6490 uint32_t old_caps, attached = 0; 6491 6492 KASSERT((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0, 6493 ("synthetic parts were attached")); 6494 6495 if (!hn_synth_attachable(sc)) 6496 return (ENXIO); 6497 6498 /* Save capabilities for later verification. */ 6499 old_caps = sc->hn_caps; 6500 sc->hn_caps = 0; 6501 6502 /* Clear RSS stuffs. */ 6503 sc->hn_rss_ind_size = 0; 6504 sc->hn_rss_hash = 0; 6505 sc->hn_rss_hcap = 0; 6506 6507 /* 6508 * Attach the primary channel _before_ attaching NVS and RNDIS. 6509 */ 6510 error = hn_chan_attach(sc, sc->hn_prichan); 6511 if (error) 6512 goto failed; 6513 6514 /* 6515 * Attach NVS. 6516 */ 6517 error = hn_nvs_attach(sc, mtu); 6518 if (error) 6519 goto failed; 6520 attached |= ATTACHED_NVS; 6521 6522 /* 6523 * Attach RNDIS _after_ NVS is attached. 6524 */ 6525 error = hn_rndis_attach(sc, mtu, &rndis_inited); 6526 if (rndis_inited) 6527 attached |= ATTACHED_RNDIS; 6528 if (error) 6529 goto failed; 6530 6531 /* 6532 * Make sure capabilities are not changed. 6533 */ 6534 if (device_is_attached(sc->hn_dev) && old_caps != sc->hn_caps) { 6535 if_printf(sc->hn_ifp, "caps mismatch old 0x%08x, new 0x%08x\n", 6536 old_caps, sc->hn_caps); 6537 error = ENXIO; 6538 goto failed; 6539 } 6540 6541 /* 6542 * Allocate sub-channels for multi-TX/RX rings. 6543 * 6544 * NOTE: 6545 * The # of RX rings that can be used is equivalent to the # of 6546 * channels to be requested. 6547 */ 6548 nsubch = sc->hn_rx_ring_cnt - 1; 6549 error = hn_synth_alloc_subchans(sc, &nsubch); 6550 if (error) 6551 goto failed; 6552 /* NOTE: _Full_ synthetic parts detach is required now. */ 6553 sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED; 6554 6555 /* 6556 * Set the # of TX/RX rings that could be used according to 6557 * the # of channels that NVS offered. 6558 */ 6559 nchan = nsubch + 1; 6560 hn_set_ring_inuse(sc, nchan); 6561 if (nchan == 1) { 6562 /* Only the primary channel can be used; done */ 6563 goto back; 6564 } 6565 6566 /* 6567 * Attach the sub-channels. 6568 * 6569 * NOTE: hn_set_ring_inuse() _must_ have been called. 6570 */ 6571 error = hn_attach_subchans(sc); 6572 if (error) 6573 goto failed; 6574 6575 /* 6576 * Configure RSS key and indirect table _after_ all sub-channels 6577 * are attached. 6578 */ 6579 if ((sc->hn_flags & HN_FLAG_HAS_RSSKEY) == 0) { 6580 /* 6581 * RSS key is not set yet; set it to the default RSS key. 6582 */ 6583 if (bootverbose) 6584 if_printf(sc->hn_ifp, "setup default RSS key\n"); 6585#ifdef RSS 6586 rss_getkey(rss->rss_key); 6587#else 6588 memcpy(rss->rss_key, hn_rss_key_default, sizeof(rss->rss_key)); 6589#endif 6590 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 6591 } 6592 6593 if ((sc->hn_flags & HN_FLAG_HAS_RSSIND) == 0) { 6594 /* 6595 * RSS indirect table is not set yet; set it up in round- 6596 * robin fashion. 6597 */ 6598 if (bootverbose) { 6599 if_printf(sc->hn_ifp, "setup default RSS indirect " 6600 "table\n"); 6601 } 6602 for (i = 0; i < NDIS_HASH_INDCNT; ++i) { 6603 uint32_t subidx; 6604 6605#ifdef RSS 6606 subidx = rss_get_indirection_to_bucket(i); 6607#else 6608 subidx = i; 6609#endif 6610 rss->rss_ind[i] = subidx % nchan; 6611 } 6612 sc->hn_flags |= HN_FLAG_HAS_RSSIND; 6613 } else { 6614 /* 6615 * # of usable channels may be changed, so we have to 6616 * make sure that all entries in RSS indirect table 6617 * are valid. 6618 * 6619 * NOTE: hn_set_ring_inuse() _must_ have been called. 6620 */ 6621 hn_rss_ind_fixup(sc); 6622 } 6623 6624 sc->hn_rss_hash = sc->hn_rss_hcap; 6625 if ((sc->hn_flags & HN_FLAG_RXVF) || 6626 (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) { 6627 /* NOTE: Don't reconfigure RSS; will do immediately. */ 6628 hn_vf_rss_fixup(sc, false); 6629 } 6630 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); 6631 if (error) 6632 goto failed; 6633back: 6634 /* 6635 * Fixup transmission aggregation setup. 6636 */ 6637 hn_set_txagg(sc); 6638 hn_rndis_init_fixat(sc, nchan); 6639 return (0); 6640 6641failed: 6642 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 6643 hn_rndis_init_fixat(sc, nchan); 6644 hn_synth_detach(sc); 6645 } else { 6646 if (attached & ATTACHED_RNDIS) { 6647 hn_rndis_init_fixat(sc, nchan); 6648 hn_rndis_detach(sc); 6649 } 6650 if (attached & ATTACHED_NVS) 6651 hn_nvs_detach(sc); 6652 hn_chan_detach(sc, sc->hn_prichan); 6653 /* Restore old capabilities. */ 6654 sc->hn_caps = old_caps; 6655 } 6656 return (error); 6657 6658#undef ATTACHED_RNDIS 6659#undef ATTACHED_NVS 6660} 6661 6662/* 6663 * NOTE: 6664 * The interface must have been suspended though hn_suspend(), before 6665 * this function get called. 6666 */ 6667static void 6668hn_synth_detach(struct hn_softc *sc) 6669{ 6670 6671 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 6672 ("synthetic parts were not attached")); 6673 6674 /* Detach the RNDIS first. */ 6675 hn_rndis_detach(sc); 6676 6677 /* Detach NVS. */ 6678 hn_nvs_detach(sc); 6679 6680 /* Detach all of the channels. */ 6681 hn_detach_allchans(sc); 6682 6683 if (vmbus_current_version >= VMBUS_VERSION_WIN10 && sc->hn_rxbuf_gpadl != 0) { 6684 /* 6685 * Host is post-Win2016, disconnect RXBUF from primary channel here. 6686 */ 6687 int error; 6688 6689 error = vmbus_chan_gpadl_disconnect(sc->hn_prichan, 6690 sc->hn_rxbuf_gpadl); 6691 if (error) { 6692 if_printf(sc->hn_ifp, 6693 "rxbuf gpadl disconn failed: %d\n", error); 6694 sc->hn_flags |= HN_FLAG_RXBUF_REF; 6695 } 6696 sc->hn_rxbuf_gpadl = 0; 6697 } 6698 6699 if (vmbus_current_version >= VMBUS_VERSION_WIN10 && sc->hn_chim_gpadl != 0) { 6700 /* 6701 * Host is post-Win2016, disconnect chimney sending buffer from 6702 * primary channel here. 6703 */ 6704 int error; 6705 6706 error = vmbus_chan_gpadl_disconnect(sc->hn_prichan, 6707 sc->hn_chim_gpadl); 6708 if (error) { 6709 if_printf(sc->hn_ifp, 6710 "chim gpadl disconn failed: %d\n", error); 6711 sc->hn_flags |= HN_FLAG_CHIM_REF; 6712 } 6713 sc->hn_chim_gpadl = 0; 6714 } 6715 sc->hn_flags &= ~HN_FLAG_SYNTH_ATTACHED; 6716} 6717 6718static void 6719hn_set_ring_inuse(struct hn_softc *sc, int ring_cnt) 6720{ 6721 KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_cnt, 6722 ("invalid ring count %d", ring_cnt)); 6723 6724 if (sc->hn_tx_ring_cnt > ring_cnt) 6725 sc->hn_tx_ring_inuse = ring_cnt; 6726 else 6727 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; 6728 sc->hn_rx_ring_inuse = ring_cnt; 6729 6730#ifdef RSS 6731 if (sc->hn_rx_ring_inuse != rss_getnumbuckets()) { 6732 if_printf(sc->hn_ifp, "# of RX rings (%d) does not match " 6733 "# of RSS buckets (%d)\n", sc->hn_rx_ring_inuse, 6734 rss_getnumbuckets()); 6735 } 6736#endif 6737 6738 if (bootverbose) { 6739 if_printf(sc->hn_ifp, "%d TX ring, %d RX ring\n", 6740 sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse); 6741 } 6742} 6743 6744static void 6745hn_chan_drain(struct hn_softc *sc, struct vmbus_channel *chan) 6746{ 6747 6748 /* 6749 * NOTE: 6750 * The TX bufring will not be drained by the hypervisor, 6751 * if the primary channel is revoked. 6752 */ 6753 while (!vmbus_chan_rx_empty(chan) || 6754 (!vmbus_chan_is_revoked(sc->hn_prichan) && 6755 !vmbus_chan_tx_empty(chan))) 6756 pause("waitch", 1); 6757 vmbus_chan_intr_drain(chan); 6758} 6759 6760static void 6761hn_disable_rx(struct hn_softc *sc) 6762{ 6763 6764 /* 6765 * Disable RX by clearing RX filter forcefully. 6766 */ 6767 sc->hn_rx_filter = NDIS_PACKET_TYPE_NONE; 6768 hn_rndis_set_rxfilter(sc, sc->hn_rx_filter); /* ignore error */ 6769 6770 /* 6771 * Give RNDIS enough time to flush all pending data packets. 6772 */ 6773 pause("waitrx", (200 * hz) / 1000); 6774} 6775 6776/* 6777 * NOTE: 6778 * RX/TX _must_ have been suspended/disabled, before this function 6779 * is called. 6780 */ 6781static void 6782hn_drain_rxtx(struct hn_softc *sc, int nchan) 6783{ 6784 struct vmbus_channel **subch = NULL; 6785 int nsubch; 6786 6787 /* 6788 * Drain RX/TX bufrings and interrupts. 6789 */ 6790 nsubch = nchan - 1; 6791 if (nsubch > 0) 6792 subch = vmbus_subchan_get(sc->hn_prichan, nsubch); 6793 6794 if (subch != NULL) { 6795 int i; 6796 6797 for (i = 0; i < nsubch; ++i) 6798 hn_chan_drain(sc, subch[i]); 6799 } 6800 hn_chan_drain(sc, sc->hn_prichan); 6801 6802 if (subch != NULL) 6803 vmbus_subchan_rel(subch, nsubch); 6804} 6805 6806static void 6807hn_suspend_data(struct hn_softc *sc) 6808{ 6809 struct hn_tx_ring *txr; 6810 int i; 6811 6812 HN_LOCK_ASSERT(sc); 6813 6814 /* 6815 * Suspend TX. 6816 */ 6817 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 6818 txr = &sc->hn_tx_ring[i]; 6819 6820 mtx_lock(&txr->hn_tx_lock); 6821 txr->hn_suspended = 1; 6822 mtx_unlock(&txr->hn_tx_lock); 6823 /* No one is able send more packets now. */ 6824 6825 /* 6826 * Wait for all pending sends to finish. 6827 * 6828 * NOTE: 6829 * We will _not_ receive all pending send-done, if the 6830 * primary channel is revoked. 6831 */ 6832 while (hn_tx_ring_pending(txr) && 6833 !vmbus_chan_is_revoked(sc->hn_prichan)) 6834 pause("hnwtx", 1 /* 1 tick */); 6835 } 6836 6837 /* 6838 * Disable RX. 6839 */ 6840 hn_disable_rx(sc); 6841 6842 /* 6843 * Drain RX/TX. 6844 */ 6845 hn_drain_rxtx(sc, sc->hn_rx_ring_inuse); 6846 6847 /* 6848 * Drain any pending TX tasks. 6849 * 6850 * NOTE: 6851 * The above hn_drain_rxtx() can dispatch TX tasks, so the TX 6852 * tasks will have to be drained _after_ the above hn_drain_rxtx(). 6853 */ 6854 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 6855 txr = &sc->hn_tx_ring[i]; 6856 6857 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task); 6858 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task); 6859 } 6860} 6861 6862static void 6863hn_suspend_mgmt_taskfunc(void *xsc, int pending __unused) 6864{ 6865 6866 ((struct hn_softc *)xsc)->hn_mgmt_taskq = NULL; 6867} 6868 6869static void 6870hn_suspend_mgmt(struct hn_softc *sc) 6871{ 6872 struct task task; 6873 6874 HN_LOCK_ASSERT(sc); 6875 6876 /* 6877 * Make sure that hn_mgmt_taskq0 can nolonger be accessed 6878 * through hn_mgmt_taskq. 6879 */ 6880 TASK_INIT(&task, 0, hn_suspend_mgmt_taskfunc, sc); 6881 vmbus_chan_run_task(sc->hn_prichan, &task); 6882 6883 /* 6884 * Make sure that all pending management tasks are completed. 6885 */ 6886 taskqueue_drain(sc->hn_mgmt_taskq0, &sc->hn_netchg_init); 6887 taskqueue_drain_timeout(sc->hn_mgmt_taskq0, &sc->hn_netchg_status); 6888 taskqueue_drain_all(sc->hn_mgmt_taskq0); 6889} 6890 6891static void 6892hn_suspend(struct hn_softc *sc) 6893{ 6894 6895 /* Disable polling. */ 6896 hn_polling(sc, 0); 6897 6898 /* 6899 * If the non-transparent mode VF is activated, the synthetic 6900 * device is receiving packets, so the data path of the 6901 * synthetic device must be suspended. 6902 */ 6903 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) || 6904 (sc->hn_flags & HN_FLAG_RXVF)) 6905 hn_suspend_data(sc); 6906 hn_suspend_mgmt(sc); 6907} 6908 6909static void 6910hn_resume_tx(struct hn_softc *sc, int tx_ring_cnt) 6911{ 6912 int i; 6913 6914 KASSERT(tx_ring_cnt <= sc->hn_tx_ring_cnt, 6915 ("invalid TX ring count %d", tx_ring_cnt)); 6916 6917 for (i = 0; i < tx_ring_cnt; ++i) { 6918 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 6919 6920 mtx_lock(&txr->hn_tx_lock); 6921 txr->hn_suspended = 0; 6922 mtx_unlock(&txr->hn_tx_lock); 6923 } 6924} 6925 6926static void 6927hn_resume_data(struct hn_softc *sc) 6928{ 6929 int i; 6930 6931 HN_LOCK_ASSERT(sc); 6932 6933 /* 6934 * Re-enable RX. 6935 */ 6936 hn_rxfilter_config(sc); 6937 6938 /* 6939 * Make sure to clear suspend status on "all" TX rings, 6940 * since hn_tx_ring_inuse can be changed after 6941 * hn_suspend_data(). 6942 */ 6943 hn_resume_tx(sc, sc->hn_tx_ring_cnt); 6944 6945#ifdef HN_IFSTART_SUPPORT 6946 if (!hn_use_if_start) 6947#endif 6948 { 6949 /* 6950 * Flush unused drbrs, since hn_tx_ring_inuse may be 6951 * reduced. 6952 */ 6953 for (i = sc->hn_tx_ring_inuse; i < sc->hn_tx_ring_cnt; ++i) 6954 hn_tx_ring_qflush(&sc->hn_tx_ring[i]); 6955 } 6956 6957 /* 6958 * Kick start TX. 6959 */ 6960 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 6961 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 6962 6963 /* 6964 * Use txeof task, so that any pending oactive can be 6965 * cleared properly. 6966 */ 6967 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 6968 } 6969} 6970 6971static void 6972hn_resume_mgmt(struct hn_softc *sc) 6973{ 6974 6975 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; 6976 6977 /* 6978 * Kick off network change detection, if it was pending. 6979 * If no network change was pending, start link status 6980 * checks, which is more lightweight than network change 6981 * detection. 6982 */ 6983 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) 6984 hn_change_network(sc); 6985 else 6986 hn_update_link_status(sc); 6987} 6988 6989static void 6990hn_resume(struct hn_softc *sc) 6991{ 6992 6993 /* 6994 * If the non-transparent mode VF is activated, the synthetic 6995 * device have to receive packets, so the data path of the 6996 * synthetic device must be resumed. 6997 */ 6998 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) || 6999 (sc->hn_flags & HN_FLAG_RXVF)) 7000 hn_resume_data(sc); 7001 7002 /* 7003 * Don't resume link status change if VF is attached/activated. 7004 * - In the non-transparent VF mode, the synthetic device marks 7005 * link down until the VF is deactivated; i.e. VF is down. 7006 * - In transparent VF mode, VF's media status is used until 7007 * the VF is detached. 7008 */ 7009 if ((sc->hn_flags & HN_FLAG_RXVF) == 0 && 7010 !(hn_xpnt_vf && sc->hn_vf_ifp != NULL)) 7011 hn_resume_mgmt(sc); 7012 7013 /* 7014 * Re-enable polling if this interface is running and 7015 * the polling is requested. 7016 */ 7017 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && sc->hn_pollhz > 0) 7018 hn_polling(sc, sc->hn_pollhz); 7019} 7020 7021static void 7022hn_rndis_rx_status(struct hn_softc *sc, const void *data, int dlen) 7023{ 7024 const struct rndis_status_msg *msg; 7025 int ofs; 7026 7027 if (dlen < sizeof(*msg)) { 7028 if_printf(sc->hn_ifp, "invalid RNDIS status\n"); 7029 return; 7030 } 7031 msg = data; 7032 7033 switch (msg->rm_status) { 7034 case RNDIS_STATUS_MEDIA_CONNECT: 7035 case RNDIS_STATUS_MEDIA_DISCONNECT: 7036 hn_update_link_status(sc); 7037 break; 7038 7039 case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG: 7040 case RNDIS_STATUS_LINK_SPEED_CHANGE: 7041 /* Not really useful; ignore. */ 7042 break; 7043 7044 case RNDIS_STATUS_NETWORK_CHANGE: 7045 ofs = RNDIS_STBUFOFFSET_ABS(msg->rm_stbufoffset); 7046 if (dlen < ofs + msg->rm_stbuflen || 7047 msg->rm_stbuflen < sizeof(uint32_t)) { 7048 if_printf(sc->hn_ifp, "network changed\n"); 7049 } else { 7050 uint32_t change; 7051 7052 memcpy(&change, ((const uint8_t *)msg) + ofs, 7053 sizeof(change)); 7054 if_printf(sc->hn_ifp, "network changed, change %u\n", 7055 change); 7056 } 7057 hn_change_network(sc); 7058 break; 7059 7060 default: 7061 if_printf(sc->hn_ifp, "unknown RNDIS status 0x%08x\n", 7062 msg->rm_status); 7063 break; 7064 } 7065} 7066 7067static int 7068hn_rndis_rxinfo(const void *info_data, int info_dlen, struct hn_rxinfo *info) 7069{ 7070 const struct rndis_pktinfo *pi = info_data; 7071 uint32_t mask = 0; 7072 7073 while (info_dlen != 0) { 7074 const void *data; 7075 uint32_t dlen; 7076 7077 if (__predict_false(info_dlen < sizeof(*pi))) 7078 return (EINVAL); 7079 if (__predict_false(info_dlen < pi->rm_size)) 7080 return (EINVAL); 7081 info_dlen -= pi->rm_size; 7082 7083 if (__predict_false(pi->rm_size & RNDIS_PKTINFO_SIZE_ALIGNMASK)) 7084 return (EINVAL); 7085 if (__predict_false(pi->rm_size < pi->rm_pktinfooffset)) 7086 return (EINVAL); 7087 dlen = pi->rm_size - pi->rm_pktinfooffset; 7088 data = pi->rm_data; 7089 7090 if (pi->rm_internal == 1) { 7091 switch (pi->rm_type) { 7092 case NDIS_PKTINFO_IT_PKTINFO_ID: 7093 if (__predict_false(dlen < NDIS_PKTINFOID_SZ)) 7094 return (EINVAL); 7095 info->pktinfo_id = 7096 (const struct packet_info_id *)data; 7097 mask |= HN_RXINFO_PKTINFO_ID; 7098 break; 7099 7100 default: 7101 goto next; 7102 } 7103 } else { 7104 switch (pi->rm_type) { 7105 case NDIS_PKTINFO_TYPE_VLAN: 7106 if (__predict_false(dlen 7107 < NDIS_VLAN_INFO_SIZE)) 7108 return (EINVAL); 7109 info->vlan_info = (const uint32_t *)data; 7110 mask |= HN_RXINFO_VLAN; 7111 break; 7112 7113 case NDIS_PKTINFO_TYPE_CSUM: 7114 if (__predict_false(dlen 7115 < NDIS_RXCSUM_INFO_SIZE)) 7116 return (EINVAL); 7117 info->csum_info = (const uint32_t *)data; 7118 mask |= HN_RXINFO_CSUM; 7119 break; 7120 7121 case HN_NDIS_PKTINFO_TYPE_HASHVAL: 7122 if (__predict_false(dlen 7123 < HN_NDIS_HASH_VALUE_SIZE)) 7124 return (EINVAL); 7125 info->hash_value = (const uint32_t *)data; 7126 mask |= HN_RXINFO_HASHVAL; 7127 break; 7128 7129 case HN_NDIS_PKTINFO_TYPE_HASHINF: 7130 if (__predict_false(dlen 7131 < HN_NDIS_HASH_INFO_SIZE)) 7132 return (EINVAL); 7133 info->hash_info = (const uint32_t *)data; 7134 mask |= HN_RXINFO_HASHINF; 7135 break; 7136 7137 default: 7138 goto next; 7139 } 7140 } 7141 7142 if (mask == HN_RXINFO_ALL) { 7143 /* All found; done */ 7144 break; 7145 } 7146next: 7147 pi = (const struct rndis_pktinfo *) 7148 ((const uint8_t *)pi + pi->rm_size); 7149 } 7150 7151 /* 7152 * Final fixup. 7153 * - If there is no hash value, invalidate the hash info. 7154 */ 7155 if ((mask & HN_RXINFO_HASHVAL) == 0) 7156 info->hash_info = NULL; 7157 return (0); 7158} 7159 7160static __inline bool 7161hn_rndis_check_overlap(int off, int len, int check_off, int check_len) 7162{ 7163 7164 if (off < check_off) { 7165 if (__predict_true(off + len <= check_off)) 7166 return (false); 7167 } else if (off > check_off) { 7168 if (__predict_true(check_off + check_len <= off)) 7169 return (false); 7170 } 7171 return (true); 7172} 7173 7174static __inline void 7175hn_rsc_add_data(struct hn_rx_ring *rxr, const void *data, 7176 uint32_t len, struct hn_rxinfo *info) 7177{ 7178 uint32_t cnt = rxr->rsc.cnt; 7179 7180 if (cnt) { 7181 rxr->rsc.pktlen += len; 7182 } else { 7183 rxr->rsc.vlan_info = info->vlan_info; 7184 rxr->rsc.csum_info = info->csum_info; 7185 rxr->rsc.hash_info = info->hash_info; 7186 rxr->rsc.hash_value = info->hash_value; 7187 rxr->rsc.pktlen = len; 7188 } 7189 7190 rxr->rsc.frag_data[cnt] = data; 7191 rxr->rsc.frag_len[cnt] = len; 7192 rxr->rsc.cnt++; 7193} 7194 7195static void 7196hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen) 7197{ 7198 const struct rndis_packet_msg *pkt; 7199 struct hn_rxinfo info; 7200 int data_off, pktinfo_off, data_len, pktinfo_len; 7201 bool rsc_more= false; 7202 7203 /* 7204 * Check length. 7205 */ 7206 if (__predict_false(dlen < sizeof(*pkt))) { 7207 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg\n"); 7208 return; 7209 } 7210 pkt = data; 7211 7212 if (__predict_false(dlen < pkt->rm_len)) { 7213 if_printf(rxr->hn_ifp, "truncated RNDIS packet msg, " 7214 "dlen %d, msglen %u\n", dlen, pkt->rm_len); 7215 return; 7216 } 7217 if (__predict_false(pkt->rm_len < 7218 pkt->rm_datalen + pkt->rm_oobdatalen + pkt->rm_pktinfolen)) { 7219 if_printf(rxr->hn_ifp, "invalid RNDIS packet msglen, " 7220 "msglen %u, data %u, oob %u, pktinfo %u\n", 7221 pkt->rm_len, pkt->rm_datalen, pkt->rm_oobdatalen, 7222 pkt->rm_pktinfolen); 7223 return; 7224 } 7225 if (__predict_false(pkt->rm_datalen == 0)) { 7226 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, no data\n"); 7227 return; 7228 } 7229 7230 /* 7231 * Check offests. 7232 */ 7233#define IS_OFFSET_INVALID(ofs) \ 7234 ((ofs) < RNDIS_PACKET_MSG_OFFSET_MIN || \ 7235 ((ofs) & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK)) 7236 7237 /* XXX Hyper-V does not meet data offset alignment requirement */ 7238 if (__predict_false(pkt->rm_dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) { 7239 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7240 "data offset %u\n", pkt->rm_dataoffset); 7241 return; 7242 } 7243 if (__predict_false(pkt->rm_oobdataoffset > 0 && 7244 IS_OFFSET_INVALID(pkt->rm_oobdataoffset))) { 7245 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7246 "oob offset %u\n", pkt->rm_oobdataoffset); 7247 return; 7248 } 7249 if (__predict_true(pkt->rm_pktinfooffset > 0) && 7250 __predict_false(IS_OFFSET_INVALID(pkt->rm_pktinfooffset))) { 7251 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7252 "pktinfo offset %u\n", pkt->rm_pktinfooffset); 7253 return; 7254 } 7255 7256#undef IS_OFFSET_INVALID 7257 7258 data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_dataoffset); 7259 data_len = pkt->rm_datalen; 7260 pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_pktinfooffset); 7261 pktinfo_len = pkt->rm_pktinfolen; 7262 7263 /* 7264 * Check OOB coverage. 7265 */ 7266 if (__predict_false(pkt->rm_oobdatalen != 0)) { 7267 int oob_off, oob_len; 7268 7269 if_printf(rxr->hn_ifp, "got oobdata\n"); 7270 oob_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_oobdataoffset); 7271 oob_len = pkt->rm_oobdatalen; 7272 7273 if (__predict_false(oob_off + oob_len > pkt->rm_len)) { 7274 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7275 "oob overflow, msglen %u, oob abs %d len %d\n", 7276 pkt->rm_len, oob_off, oob_len); 7277 return; 7278 } 7279 7280 /* 7281 * Check against data. 7282 */ 7283 if (hn_rndis_check_overlap(oob_off, oob_len, 7284 data_off, data_len)) { 7285 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7286 "oob overlaps data, oob abs %d len %d, " 7287 "data abs %d len %d\n", 7288 oob_off, oob_len, data_off, data_len); 7289 return; 7290 } 7291 7292 /* 7293 * Check against pktinfo. 7294 */ 7295 if (pktinfo_len != 0 && 7296 hn_rndis_check_overlap(oob_off, oob_len, 7297 pktinfo_off, pktinfo_len)) { 7298 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7299 "oob overlaps pktinfo, oob abs %d len %d, " 7300 "pktinfo abs %d len %d\n", 7301 oob_off, oob_len, pktinfo_off, pktinfo_len); 7302 return; 7303 } 7304 } 7305 7306 /* 7307 * Check per-packet-info coverage and find useful per-packet-info. 7308 */ 7309 info.vlan_info = NULL; 7310 info.csum_info = NULL; 7311 info.hash_info = NULL; 7312 info.pktinfo_id = NULL; 7313 7314 if (__predict_true(pktinfo_len != 0)) { 7315 bool overlap; 7316 int error; 7317 7318 if (__predict_false(pktinfo_off + pktinfo_len > pkt->rm_len)) { 7319 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7320 "pktinfo overflow, msglen %u, " 7321 "pktinfo abs %d len %d\n", 7322 pkt->rm_len, pktinfo_off, pktinfo_len); 7323 return; 7324 } 7325 7326 /* 7327 * Check packet info coverage. 7328 */ 7329 overlap = hn_rndis_check_overlap(pktinfo_off, pktinfo_len, 7330 data_off, data_len); 7331 if (__predict_false(overlap)) { 7332 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7333 "pktinfo overlap data, pktinfo abs %d len %d, " 7334 "data abs %d len %d\n", 7335 pktinfo_off, pktinfo_len, data_off, data_len); 7336 return; 7337 } 7338 7339 /* 7340 * Find useful per-packet-info. 7341 */ 7342 error = hn_rndis_rxinfo(((const uint8_t *)pkt) + pktinfo_off, 7343 pktinfo_len, &info); 7344 if (__predict_false(error)) { 7345 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg " 7346 "pktinfo\n"); 7347 return; 7348 } 7349 } 7350 7351 if (__predict_false(data_off + data_len > pkt->rm_len)) { 7352 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7353 "data overflow, msglen %u, data abs %d len %d\n", 7354 pkt->rm_len, data_off, data_len); 7355 return; 7356 } 7357 7358 /* Identify RSC fragments, drop invalid packets */ 7359 if ((info.pktinfo_id != NULL) && 7360 (info.pktinfo_id->flag & HN_NDIS_PKTINFO_SUBALLOC)) { 7361 if (info.pktinfo_id->flag & HN_NDIS_PKTINFO_1ST_FRAG) { 7362 rxr->rsc.cnt = 0; 7363 rxr->hn_rsc_pkts++; 7364 } else if (rxr->rsc.cnt == 0) 7365 goto drop; 7366 7367 rsc_more = true; 7368 7369 if (info.pktinfo_id->flag & HN_NDIS_PKTINFO_LAST_FRAG) 7370 rsc_more = false; 7371 7372 if (rsc_more && rxr->rsc.is_last) 7373 goto drop; 7374 } else { 7375 rxr->rsc.cnt = 0; 7376 } 7377 7378 if (__predict_false(rxr->rsc.cnt >= HN_NVS_RSC_MAX)) 7379 goto drop; 7380 7381 /* Store data in per rx ring structure */ 7382 hn_rsc_add_data(rxr,((const uint8_t *)pkt) + data_off, 7383 data_len, &info); 7384 7385 if (rsc_more) 7386 return; 7387 7388 hn_rxpkt(rxr); 7389 rxr->rsc.cnt = 0; 7390 return; 7391drop: 7392 rxr->hn_rsc_drop++; 7393 return; 7394} 7395 7396static __inline void 7397hn_rndis_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen) 7398{ 7399 const struct rndis_msghdr *hdr; 7400 7401 if (__predict_false(dlen < sizeof(*hdr))) { 7402 if_printf(rxr->hn_ifp, "invalid RNDIS msg\n"); 7403 return; 7404 } 7405 hdr = data; 7406 7407 if (__predict_true(hdr->rm_type == REMOTE_NDIS_PACKET_MSG)) { 7408 /* Hot data path. */ 7409 hn_rndis_rx_data(rxr, data, dlen); 7410 /* Done! */ 7411 return; 7412 } 7413 7414 if (hdr->rm_type == REMOTE_NDIS_INDICATE_STATUS_MSG) 7415 hn_rndis_rx_status(rxr->hn_ifp->if_softc, data, dlen); 7416 else 7417 hn_rndis_rx_ctrl(rxr->hn_ifp->if_softc, data, dlen); 7418} 7419 7420static void 7421hn_nvs_handle_notify(struct hn_softc *sc, const struct vmbus_chanpkt_hdr *pkt) 7422{ 7423 const struct hn_nvs_hdr *hdr; 7424 7425 if (VMBUS_CHANPKT_DATALEN(pkt) < sizeof(*hdr)) { 7426 if_printf(sc->hn_ifp, "invalid nvs notify\n"); 7427 return; 7428 } 7429 hdr = VMBUS_CHANPKT_CONST_DATA(pkt); 7430 7431 if (hdr->nvs_type == HN_NVS_TYPE_TXTBL_NOTE) { 7432 /* Useless; ignore */ 7433 return; 7434 } 7435 if_printf(sc->hn_ifp, "got notify, nvs type %u\n", hdr->nvs_type); 7436} 7437 7438static void 7439hn_nvs_handle_comp(struct hn_softc *sc, struct vmbus_channel *chan, 7440 const struct vmbus_chanpkt_hdr *pkt) 7441{ 7442 struct hn_nvs_sendctx *sndc; 7443 7444 sndc = (struct hn_nvs_sendctx *)(uintptr_t)pkt->cph_xactid; 7445 sndc->hn_cb(sndc, sc, chan, VMBUS_CHANPKT_CONST_DATA(pkt), 7446 VMBUS_CHANPKT_DATALEN(pkt)); 7447 /* 7448 * NOTE: 7449 * 'sndc' CAN NOT be accessed anymore, since it can be freed by 7450 * its callback. 7451 */ 7452} 7453 7454static void 7455hn_nvs_handle_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, 7456 const struct vmbus_chanpkt_hdr *pkthdr) 7457{ 7458 const struct vmbus_chanpkt_rxbuf *pkt; 7459 const struct hn_nvs_hdr *nvs_hdr; 7460 int count, i, hlen; 7461 7462 if (__predict_false(VMBUS_CHANPKT_DATALEN(pkthdr) < sizeof(*nvs_hdr))) { 7463 if_printf(rxr->hn_ifp, "invalid nvs RNDIS\n"); 7464 return; 7465 } 7466 nvs_hdr = VMBUS_CHANPKT_CONST_DATA(pkthdr); 7467 7468 /* Make sure that this is a RNDIS message. */ 7469 if (__predict_false(nvs_hdr->nvs_type != HN_NVS_TYPE_RNDIS)) { 7470 if_printf(rxr->hn_ifp, "nvs type %u, not RNDIS\n", 7471 nvs_hdr->nvs_type); 7472 return; 7473 } 7474 7475 hlen = VMBUS_CHANPKT_GETLEN(pkthdr->cph_hlen); 7476 if (__predict_false(hlen < sizeof(*pkt))) { 7477 if_printf(rxr->hn_ifp, "invalid rxbuf chanpkt\n"); 7478 return; 7479 } 7480 pkt = (const struct vmbus_chanpkt_rxbuf *)pkthdr; 7481 7482 if (__predict_false(pkt->cp_rxbuf_id != HN_NVS_RXBUF_SIG)) { 7483 if_printf(rxr->hn_ifp, "invalid rxbuf_id 0x%08x\n", 7484 pkt->cp_rxbuf_id); 7485 return; 7486 } 7487 7488 count = pkt->cp_rxbuf_cnt; 7489 if (__predict_false(hlen < 7490 __offsetof(struct vmbus_chanpkt_rxbuf, cp_rxbuf[count]))) { 7491 if_printf(rxr->hn_ifp, "invalid rxbuf_cnt %d\n", count); 7492 return; 7493 } 7494 7495 /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */ 7496 for (i = 0; i < count; ++i) { 7497 int ofs, len; 7498 7499 ofs = pkt->cp_rxbuf[i].rb_ofs; 7500 len = pkt->cp_rxbuf[i].rb_len; 7501 if (__predict_false(ofs + len > HN_RXBUF_SIZE)) { 7502 if_printf(rxr->hn_ifp, "%dth RNDIS msg overflow rxbuf, " 7503 "ofs %d, len %d\n", i, ofs, len); 7504 continue; 7505 } 7506 7507 rxr->rsc.is_last = (i == (count - 1)); 7508 hn_rndis_rxpkt(rxr, rxr->hn_rxbuf + ofs, len); 7509 } 7510 7511 /* 7512 * Ack the consumed RXBUF associated w/ this channel packet, 7513 * so that this RXBUF can be recycled by the hypervisor. 7514 */ 7515 hn_nvs_ack_rxbuf(rxr, chan, pkt->cp_hdr.cph_xactid); 7516} 7517 7518static void 7519hn_nvs_ack_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, 7520 uint64_t tid) 7521{ 7522 struct hn_nvs_rndis_ack ack; 7523 int retries, error; 7524 7525 ack.nvs_type = HN_NVS_TYPE_RNDIS_ACK; 7526 ack.nvs_status = HN_NVS_STATUS_OK; 7527 7528 retries = 0; 7529again: 7530 error = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP, 7531 VMBUS_CHANPKT_FLAG_NONE, &ack, sizeof(ack), tid); 7532 if (__predict_false(error == EAGAIN)) { 7533 /* 7534 * NOTE: 7535 * This should _not_ happen in real world, since the 7536 * consumption of the TX bufring from the TX path is 7537 * controlled. 7538 */ 7539 if (rxr->hn_ack_failed == 0) 7540 if_printf(rxr->hn_ifp, "RXBUF ack retry\n"); 7541 rxr->hn_ack_failed++; 7542 retries++; 7543 if (retries < 10) { 7544 DELAY(100); 7545 goto again; 7546 } 7547 /* RXBUF leaks! */ 7548 if_printf(rxr->hn_ifp, "RXBUF ack failed\n"); 7549 } 7550} 7551 7552static void 7553hn_chan_callback(struct vmbus_channel *chan, void *xrxr) 7554{ 7555 struct hn_rx_ring *rxr = xrxr; 7556 struct hn_softc *sc = rxr->hn_ifp->if_softc; 7557 7558 for (;;) { 7559 struct vmbus_chanpkt_hdr *pkt = rxr->hn_pktbuf; 7560 int error, pktlen; 7561 7562 pktlen = rxr->hn_pktbuf_len; 7563 error = vmbus_chan_recv_pkt(chan, pkt, &pktlen); 7564 if (__predict_false(error == ENOBUFS)) { 7565 void *nbuf; 7566 int nlen; 7567 7568 /* 7569 * Expand channel packet buffer. 7570 * 7571 * XXX 7572 * Use M_WAITOK here, since allocation failure 7573 * is fatal. 7574 */ 7575 nlen = rxr->hn_pktbuf_len * 2; 7576 while (nlen < pktlen) 7577 nlen *= 2; 7578 nbuf = malloc(nlen, M_DEVBUF, M_WAITOK); 7579 7580 if_printf(rxr->hn_ifp, "expand pktbuf %d -> %d\n", 7581 rxr->hn_pktbuf_len, nlen); 7582 7583 free(rxr->hn_pktbuf, M_DEVBUF); 7584 rxr->hn_pktbuf = nbuf; 7585 rxr->hn_pktbuf_len = nlen; 7586 /* Retry! */ 7587 continue; 7588 } else if (__predict_false(error == EAGAIN)) { 7589 /* No more channel packets; done! */ 7590 break; 7591 } 7592 KASSERT(!error, ("vmbus_chan_recv_pkt failed: %d", error)); 7593 7594 switch (pkt->cph_type) { 7595 case VMBUS_CHANPKT_TYPE_COMP: 7596 hn_nvs_handle_comp(sc, chan, pkt); 7597 break; 7598 7599 case VMBUS_CHANPKT_TYPE_RXBUF: 7600 hn_nvs_handle_rxbuf(rxr, chan, pkt); 7601 break; 7602 7603 case VMBUS_CHANPKT_TYPE_INBAND: 7604 hn_nvs_handle_notify(sc, pkt); 7605 break; 7606 7607 default: 7608 if_printf(rxr->hn_ifp, "unknown chan pkt %u\n", 7609 pkt->cph_type); 7610 break; 7611 } 7612 } 7613 hn_chan_rollup(rxr, rxr->hn_txr); 7614} 7615 7616static void 7617hn_sysinit(void *arg __unused) 7618{ 7619 int i; 7620 7621 hn_udpcs_fixup = counter_u64_alloc(M_WAITOK); 7622 7623#ifdef HN_IFSTART_SUPPORT 7624 /* 7625 * Don't use ifnet.if_start if transparent VF mode is requested; 7626 * mainly due to the IFF_DRV_OACTIVE flag. 7627 */ 7628 if (hn_xpnt_vf && hn_use_if_start) { 7629 hn_use_if_start = 0; 7630 printf("hn: tranparent VF mode, if_transmit will be used, " 7631 "instead of if_start\n"); 7632 } 7633#endif 7634 if (hn_xpnt_vf_attwait < HN_XPNT_VF_ATTWAIT_MIN) { 7635 printf("hn: invalid transparent VF attach routing " 7636 "wait timeout %d, reset to %d\n", 7637 hn_xpnt_vf_attwait, HN_XPNT_VF_ATTWAIT_MIN); 7638 hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN; 7639 } 7640 7641 /* 7642 * Initialize VF map. 7643 */ 7644 rm_init_flags(&hn_vfmap_lock, "hn_vfmap", RM_SLEEPABLE); 7645 hn_vfmap_size = HN_VFMAP_SIZE_DEF; 7646 hn_vfmap = malloc(sizeof(struct ifnet *) * hn_vfmap_size, M_DEVBUF, 7647 M_WAITOK | M_ZERO); 7648 7649 /* 7650 * Fix the # of TX taskqueues. 7651 */ 7652 if (hn_tx_taskq_cnt <= 0) 7653 hn_tx_taskq_cnt = 1; 7654 else if (hn_tx_taskq_cnt > mp_ncpus) 7655 hn_tx_taskq_cnt = mp_ncpus; 7656 7657 /* 7658 * Fix the TX taskqueue mode. 7659 */ 7660 switch (hn_tx_taskq_mode) { 7661 case HN_TX_TASKQ_M_INDEP: 7662 case HN_TX_TASKQ_M_GLOBAL: 7663 case HN_TX_TASKQ_M_EVTTQ: 7664 break; 7665 default: 7666 hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP; 7667 break; 7668 } 7669 7670 if (vm_guest != VM_GUEST_HV) 7671 return; 7672 7673 if (hn_tx_taskq_mode != HN_TX_TASKQ_M_GLOBAL) 7674 return; 7675 7676 hn_tx_taskque = malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *), 7677 M_DEVBUF, M_WAITOK); 7678 for (i = 0; i < hn_tx_taskq_cnt; ++i) { 7679 hn_tx_taskque[i] = taskqueue_create("hn_tx", M_WAITOK, 7680 taskqueue_thread_enqueue, &hn_tx_taskque[i]); 7681 taskqueue_start_threads(&hn_tx_taskque[i], 1, PI_NET, 7682 "hn tx%d", i); 7683 } 7684} 7685SYSINIT(hn_sysinit, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_sysinit, NULL); 7686 7687static void 7688hn_sysuninit(void *arg __unused) 7689{ 7690 7691 if (hn_tx_taskque != NULL) { 7692 int i; 7693 7694 for (i = 0; i < hn_tx_taskq_cnt; ++i) 7695 taskqueue_free(hn_tx_taskque[i]); 7696 free(hn_tx_taskque, M_DEVBUF); 7697 } 7698 7699 if (hn_vfmap != NULL) 7700 free(hn_vfmap, M_DEVBUF); 7701 rm_destroy(&hn_vfmap_lock); 7702 7703 counter_u64_free(hn_udpcs_fixup); 7704} 7705SYSUNINIT(hn_sysuninit, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_sysuninit, NULL); 7706