1/* $NetBSD: if_hvn.c,v 1.27 2024/02/09 22:08:34 andvar Exp $ */ 2/* $OpenBSD: if_hvn.c,v 1.39 2018/03/11 14:31:34 mikeb Exp $ */ 3 4/*- 5 * Copyright (c) 2009-2012,2016 Microsoft Corp. 6 * Copyright (c) 2010-2012 Citrix Inc. 7 * Copyright (c) 2012 NetApp Inc. 8 * Copyright (c) 2016 Mike Belopuhov <mike@esdenera.com> 9 * All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice unmodified, this list of conditions, and the following 16 * disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33/* 34 * The OpenBSD port was done under funding by Esdenera Networks GmbH. 35 */ 36 37#include <sys/cdefs.h> 38__KERNEL_RCSID(0, "$NetBSD: if_hvn.c,v 1.27 2024/02/09 22:08:34 andvar Exp $"); 39 40#ifdef _KERNEL_OPT 41#include "opt_if_hvn.h" 42#include "opt_inet.h" 43#include "opt_inet6.h" 44#endif 45 46#include <sys/param.h> 47#include <sys/systm.h> 48#include <sys/kernel.h> 49#include <sys/device.h> 50#include <sys/bitops.h> 51#include <sys/bus.h> 52#include <sys/condvar.h> 53#include <sys/cpu.h> 54#include <sys/evcnt.h> 55#include <sys/intr.h> 56#include <sys/kmem.h> 57#include <sys/kthread.h> 58#include <sys/mutex.h> 59#include <sys/pcq.h> 60#include <sys/sysctl.h> 61#include <sys/workqueue.h> 62 63#include <net/if.h> 64#include <net/if_ether.h> 65#include <net/if_media.h> 66#include <net/if_vlanvar.h> 67#include <net/rss_config.h> 68#include <netinet/in.h> 69#include <netinet/ip.h> 70#include <netinet/ip6.h> 71#include <netinet/udp.h> 72 73#include <net/bpf.h> 74 75#include <dev/ic/ndisreg.h> 76#include <dev/ic/rndisreg.h> 77 78#include <dev/hyperv/vmbusvar.h> 79#include <dev/hyperv/if_hvnreg.h> 80 81#ifndef EVL_PRIO_BITS 82#define EVL_PRIO_BITS 13 83#endif 84#ifndef EVL_CFI_BITS 85#define EVL_CFI_BITS 12 86#endif 87 88#define HVN_CHIM_SIZE (15 * 1024 * 1024) 89 90#define HVN_NVS_MSGSIZE 32 91#define HVN_NVS_BUFSIZE PAGE_SIZE 92 93#define HVN_RING_BUFSIZE (128 * PAGE_SIZE) 94#define HVN_RING_IDX2CPU(sc, idx) ((idx) % ncpu) 95 96#ifndef HVN_CHANNEL_MAX_COUNT_DEFAULT 97#define HVN_CHANNEL_MAX_COUNT_DEFAULT 8 98#endif 99 100#ifndef HVN_LINK_STATE_CHANGE_DELAY 101#define HVN_LINK_STATE_CHANGE_DELAY 5000 102#endif 103 104#define HVN_WORKQUEUE_PRI PRI_SOFTNET 105 106/* 107 * RNDIS control interface 108 */ 109#define HVN_RNDIS_CTLREQS 4 110#define HVN_RNDIS_BUFSIZE 512 111 112struct rndis_cmd { 113 uint32_t rc_id; 114 struct hvn_nvs_rndis rc_msg; 115 void *rc_req; 116 bus_dmamap_t rc_dmap; 117 bus_dma_segment_t rc_segs; 118 int rc_nsegs; 119 uint64_t rc_gpa; 120 struct rndis_packet_msg rc_cmp; 121 uint32_t rc_cmplen; 122 uint8_t rc_cmpbuf[HVN_RNDIS_BUFSIZE]; 123 int rc_done; 124 TAILQ_ENTRY(rndis_cmd) rc_entry; 125 kmutex_t rc_lock; 126 kcondvar_t rc_cv; 127}; 128TAILQ_HEAD(rndis_queue, rndis_cmd); 129 130#define HVN_MTU_MIN 68 131#define HVN_MTU_MAX (65535 - ETHER_ADDR_LEN) 132 133#define HVN_RNDIS_XFER_SIZE 2048 134 135#define HVN_NDIS_TXCSUM_CAP_IP4 \ 136 (NDIS_TXCSUM_CAP_IP4 | NDIS_TXCSUM_CAP_IP4OPT) 137#define HVN_NDIS_TXCSUM_CAP_TCP4 \ 138 (NDIS_TXCSUM_CAP_TCP4 | NDIS_TXCSUM_CAP_TCP4OPT) 139#define HVN_NDIS_TXCSUM_CAP_TCP6 \ 140 (NDIS_TXCSUM_CAP_TCP6 | NDIS_TXCSUM_CAP_TCP6OPT | \ 141 NDIS_TXCSUM_CAP_IP6EXT) 142#define HVN_NDIS_TXCSUM_CAP_UDP6 \ 143 (NDIS_TXCSUM_CAP_UDP6 | NDIS_TXCSUM_CAP_IP6EXT) 144#define HVN_NDIS_LSOV2_CAP_IP6 \ 145 (NDIS_LSOV2_CAP_IP6EXT | NDIS_LSOV2_CAP_TCP6OPT) 146 147#define HVN_RNDIS_CMD_NORESP __BIT(0) 148 149#define HVN_NVS_CMD_NORESP __BIT(0) 150 151/* 152 * Tx ring 153 */ 154#define HVN_TX_DESC 512 155#define HVN_TX_FRAGS 15 /* 31 is the max */ 156#define HVN_TX_FRAG_SIZE PAGE_SIZE 157#define HVN_TX_PKT_SIZE 16384 158 159#define HVN_RNDIS_PKT_LEN \ 160 (sizeof(struct rndis_packet_msg) + \ 161 sizeof(struct rndis_pktinfo) + NDIS_VLAN_INFO_SIZE + \ 162 sizeof(struct rndis_pktinfo) + NDIS_TXCSUM_INFO_SIZE) 163 164#define HVN_PKTSIZE_MIN(align) \ 165 roundup2(ETHER_MIN_LEN + ETHER_VLAN_ENCAP_LEN - ETHER_CRC_LEN + \ 166 HVN_RNDIS_PKT_LEN, (align)) 167#define HVN_PKTSIZE(m, align) \ 168 roundup2((m)->m_pkthdr.len + HVN_RNDIS_PKT_LEN, (align)) 169 170struct hvn_tx_desc { 171 uint32_t txd_id; 172 struct vmbus_gpa txd_sgl[HVN_TX_FRAGS + 1]; 173 int txd_nsge; 174 struct mbuf *txd_buf; 175 bus_dmamap_t txd_dmap; 176 struct vmbus_gpa txd_gpa; 177 struct rndis_packet_msg *txd_req; 178 TAILQ_ENTRY(hvn_tx_desc) txd_entry; 179 u_int txd_refs; 180 uint32_t txd_flags; 181#define HVN_TXD_FLAG_ONAGG __BIT(0) 182#define HVN_TXD_FLAG_DMAMAP __BIT(1) 183 uint32_t txd_chim_index; 184 int txd_chim_size; 185 STAILQ_ENTRY(hvn_tx_desc) txd_agg_entry; 186 STAILQ_HEAD(, hvn_tx_desc) txd_agg_list; 187}; 188 189struct hvn_softc; 190struct hvn_rx_ring; 191 192struct hvn_tx_ring { 193 struct hvn_softc *txr_softc; 194 struct vmbus_channel *txr_chan; 195 struct hvn_rx_ring *txr_rxr; 196 void *txr_si; 197 char txr_name[16]; 198 199 int txr_id; 200 int txr_oactive; 201 int txr_suspended; 202 int txr_csum_assist; 203 uint64_t txr_caps_assist; 204 uint32_t txr_flags; 205#define HVN_TXR_FLAG_UDP_HASH __BIT(0) 206 207 struct evcnt txr_evpkts; 208 struct evcnt txr_evsends; 209 struct evcnt txr_evnodesc; 210 struct evcnt txr_evdmafailed; 211 struct evcnt txr_evdefrag; 212 struct evcnt txr_evpcqdrop; 213 struct evcnt txr_evtransmitdefer; 214 struct evcnt txr_evflushfailed; 215 struct evcnt txr_evchimneytried; 216 struct evcnt txr_evchimney; 217 struct evcnt txr_evvlanfixup; 218 struct evcnt txr_evvlanhwtagging; 219 struct evcnt txr_evvlantap; 220 221 kmutex_t txr_lock; 222 pcq_t *txr_interq; 223 224 uint32_t txr_avail; 225 TAILQ_HEAD(, hvn_tx_desc) txr_list; 226 struct hvn_tx_desc txr_desc[HVN_TX_DESC]; 227 uint8_t *txr_msgs; 228 struct hyperv_dma txr_dma; 229 230 int txr_chim_size; 231 232 /* Applied packet transmission aggregation limits. */ 233 int txr_agg_szmax; 234 short txr_agg_pktmax; 235 short txr_agg_align; 236 237 /* Packet transmission aggregation states. */ 238 struct hvn_tx_desc *txr_agg_txd; 239 int txr_agg_szleft; 240 short txr_agg_pktleft; 241 struct rndis_packet_msg *txr_agg_prevpkt; 242 243 /* Temporary stats for each sends. */ 244 int txr_stat_pkts; 245 int txr_stat_size; 246 int txr_stat_mcasts; 247 248 int (*txr_sendpkt)(struct hvn_tx_ring *, 249 struct hvn_tx_desc *); 250} __aligned(CACHE_LINE_SIZE); 251 252struct hvn_rx_ring { 253 struct hvn_softc *rxr_softc; 254 struct vmbus_channel *rxr_chan; 255 struct hvn_tx_ring *rxr_txr; 256 void *rxr_si; 257 bool rxr_workqueue; 258 char rxr_name[16]; 259 260 struct work rxr_wk; 261 volatile bool rxr_onlist; 262 volatile bool rxr_onproc; 263 kmutex_t rxr_onwork_lock; 264 kcondvar_t rxr_onwork_cv; 265 266 uint32_t rxr_flags; 267#define HVN_RXR_FLAG_UDP_HASH __BIT(0) 268 269 kmutex_t rxr_lock; 270 271 struct evcnt rxr_evpkts; 272 struct evcnt rxr_evcsum_ip; 273 struct evcnt rxr_evcsum_tcp; 274 struct evcnt rxr_evcsum_udp; 275 struct evcnt rxr_evvlanhwtagging; 276 struct evcnt rxr_evintr; 277 struct evcnt rxr_evdefer; 278 struct evcnt rxr_evdeferreq; 279 struct evcnt rxr_evredeferreq; 280 281 /* NVS */ 282 uint8_t *rxr_nvsbuf; 283} __aligned(CACHE_LINE_SIZE); 284 285struct hvn_softc { 286 device_t sc_dev; 287 288 struct vmbus_softc *sc_vmbus; 289 struct vmbus_channel *sc_prichan; 290 bus_dma_tag_t sc_dmat; 291 292 struct ethercom sc_ec; 293 struct ifmedia sc_media; 294 struct if_percpuq *sc_ipq; 295 struct workqueue *sc_wq; 296 bool sc_txrx_workqueue; 297 kmutex_t sc_core_lock; 298 299 kmutex_t sc_link_lock; 300 kcondvar_t sc_link_cv; 301 callout_t sc_link_tmout; 302 lwp_t *sc_link_lwp; 303 uint32_t sc_link_ev; 304#define HVN_LINK_EV_STATE_CHANGE __BIT(0) 305#define HVN_LINK_EV_NETWORK_CHANGE_TMOUT __BIT(1) 306#define HVN_LINK_EV_NETWORK_CHANGE __BIT(2) 307#define HVN_LINK_EV_RESUME_NETWORK __BIT(3) 308#define HVN_LINK_EV_EXIT_THREAD __BIT(4) 309 int sc_link_state; 310 bool sc_link_onproc; 311 bool sc_link_pending; 312 bool sc_link_suspend; 313 314 int sc_tx_process_limit; 315 int sc_rx_process_limit; 316 int sc_tx_intr_process_limit; 317 int sc_rx_intr_process_limit; 318 319 struct sysctllog *sc_sysctllog; 320 321 uint32_t sc_caps; 322#define HVN_CAPS_VLAN __BIT(0) 323#define HVN_CAPS_MTU __BIT(1) 324#define HVN_CAPS_IPCS __BIT(2) 325#define HVN_CAPS_TCP4CS __BIT(3) 326#define HVN_CAPS_TCP6CS __BIT(4) 327#define HVN_CAPS_UDP4CS __BIT(5) 328#define HVN_CAPS_UDP6CS __BIT(6) 329#define HVN_CAPS_TSO4 __BIT(7) 330#define HVN_CAPS_TSO6 __BIT(8) 331#define HVN_CAPS_HASHVAL __BIT(9) 332#define HVN_CAPS_UDPHASH __BIT(10) 333 334 uint32_t sc_flags; 335#define HVN_SCF_ATTACHED __BIT(0) 336#define HVN_SCF_RXBUF_CONNECTED __BIT(1) 337#define HVN_SCF_CHIM_CONNECTED __BIT(2) 338#define HVN_SCF_REVOKED __BIT(3) 339#define HVN_SCF_HAS_RSSKEY __BIT(4) 340#define HVN_SCF_HAS_RSSIND __BIT(5) 341 342 /* NVS protocol */ 343 int sc_proto; 344 uint32_t sc_nvstid; 345 uint8_t sc_nvsrsp[HVN_NVS_MSGSIZE]; 346 int sc_nvsdone; 347 kmutex_t sc_nvsrsp_lock; 348 kcondvar_t sc_nvsrsp_cv; 349 350 /* RNDIS protocol */ 351 int sc_ndisver; 352 uint32_t sc_rndisrid; 353 int sc_tso_szmax; 354 int sc_tso_sgmin; 355 uint32_t sc_rndis_agg_size; 356 uint32_t sc_rndis_agg_pkts; 357 uint32_t sc_rndis_agg_align; 358 struct rndis_queue sc_cntl_sq; /* submission queue */ 359 kmutex_t sc_cntl_sqlck; 360 struct rndis_queue sc_cntl_cq; /* completion queue */ 361 kmutex_t sc_cntl_cqlck; 362 struct rndis_queue sc_cntl_fq; /* free queue */ 363 kmutex_t sc_cntl_fqlck; 364 kcondvar_t sc_cntl_fqcv; 365 struct rndis_cmd sc_cntl_msgs[HVN_RNDIS_CTLREQS]; 366 struct hvn_nvs_rndis sc_data_msg; 367 368 int sc_rss_ind_size; 369 uint32_t sc_rss_hash; /* setting, NDIS_HASH_ */ 370 uint32_t sc_rss_hcap; /* caps, NDIS_HASH_ */ 371 struct ndis_rssprm_toeplitz sc_rss; 372 373 /* Rx ring */ 374 uint8_t *sc_rx_ring; 375 int sc_rx_size; 376 uint32_t sc_rx_hndl; 377 struct hyperv_dma sc_rx_dma; 378 struct hvn_rx_ring *sc_rxr; 379 int sc_nrxr; 380 int sc_nrxr_inuse; 381 382 /* Tx ring */ 383 struct hvn_tx_ring *sc_txr; 384 int sc_ntxr; 385 int sc_ntxr_inuse; 386 387 /* chimney sending buffers */ 388 uint8_t *sc_chim; 389 uint32_t sc_chim_hndl; 390 struct hyperv_dma sc_chim_dma; 391 kmutex_t sc_chim_bmap_lock; 392 u_long *sc_chim_bmap; 393 int sc_chim_bmap_cnt; 394 int sc_chim_cnt; 395 int sc_chim_szmax; 396 397 /* Packet transmission aggregation user settings. */ 398 int sc_agg_size; 399 int sc_agg_pkts; 400}; 401 402#define SC2IFP(_sc_) (&(_sc_)->sc_ec.ec_if) 403#define IFP2SC(_ifp_) ((_ifp_)->if_softc) 404 405#ifndef HVN_TX_PROCESS_LIMIT_DEFAULT 406#define HVN_TX_PROCESS_LIMIT_DEFAULT 128 407#endif 408#ifndef HVN_RX_PROCESS_LIMIT_DEFAULT 409#define HVN_RX_PROCESS_LIMIT_DEFAULT 128 410#endif 411#ifndef HVN_TX_INTR_PROCESS_LIMIT_DEFAULT 412#define HVN_TX_INTR_PROCESS_LIMIT_DEFAULT 256 413#endif 414#ifndef HVN_RX_INTR_PROCESS_LIMIT_DEFAULT 415#define HVN_RX_INTR_PROCESS_LIMIT_DEFAULT 256 416#endif 417 418/* 419 * See hvn_set_hlen(). 420 * 421 * This value is for Azure. For Hyper-V, set this above 422 * 65536 to disable UDP datagram checksum fixup. 423 */ 424#ifndef HVN_UDP_CKSUM_FIXUP_MTU_DEFAULT 425#define HVN_UDP_CKSUM_FIXUP_MTU_DEFAULT 1420 426#endif 427static int hvn_udpcs_fixup_mtu = HVN_UDP_CKSUM_FIXUP_MTU_DEFAULT; 428 429/* Limit chimney send size */ 430static int hvn_tx_chimney_size = 0; 431 432/* # of channels to use; each channel has one RX ring and one TX ring */ 433#ifndef HVN_CHANNEL_COUNT_DEFAULT 434#define HVN_CHANNEL_COUNT_DEFAULT 0 435#endif 436static int hvn_channel_cnt = HVN_CHANNEL_COUNT_DEFAULT; 437 438/* # of transmit rings to use */ 439#ifndef HVN_TX_RING_COUNT_DEFAULT 440#define HVN_TX_RING_COUNT_DEFAULT 0 441#endif 442static int hvn_tx_ring_cnt = HVN_TX_RING_COUNT_DEFAULT; 443 444/* Packet transmission aggregation size limit */ 445static int hvn_tx_agg_size = -1; 446 447/* Packet transmission aggregation count limit */ 448static int hvn_tx_agg_pkts = -1; 449 450static int hvn_match(device_t, cfdata_t, void *); 451static void hvn_attach(device_t, device_t, void *); 452static int hvn_detach(device_t, int); 453 454CFATTACH_DECL_NEW(hvn, sizeof(struct hvn_softc), 455 hvn_match, hvn_attach, hvn_detach, NULL); 456 457static int hvn_ioctl(struct ifnet *, u_long, void *); 458static int hvn_media_change(struct ifnet *); 459static void hvn_media_status(struct ifnet *, struct ifmediareq *); 460static void hvn_link_task(void *); 461static void hvn_link_event(struct hvn_softc *, uint32_t); 462static void hvn_link_netchg_tmout_cb(void *); 463static int hvn_init(struct ifnet *); 464static int hvn_init_locked(struct ifnet *); 465static void hvn_stop(struct ifnet *, int); 466static void hvn_stop_locked(struct ifnet *); 467static void hvn_start(struct ifnet *); 468static int hvn_transmit(struct ifnet *, struct mbuf *); 469static void hvn_deferred_transmit(void *); 470static int hvn_flush_txagg(struct hvn_tx_ring *); 471static int hvn_encap(struct hvn_tx_ring *, struct hvn_tx_desc *, 472 struct mbuf *, int); 473static int hvn_txpkt(struct hvn_tx_ring *, struct hvn_tx_desc *); 474static void hvn_txeof(struct hvn_tx_ring *, uint64_t); 475static int hvn_rx_ring_create(struct hvn_softc *, int); 476static int hvn_rx_ring_destroy(struct hvn_softc *); 477static void hvn_fixup_rx_data(struct hvn_softc *); 478static int hvn_tx_ring_create(struct hvn_softc *, int); 479static void hvn_tx_ring_destroy(struct hvn_softc *); 480static void hvn_set_chim_size(struct hvn_softc *, int); 481static uint32_t hvn_chim_alloc(struct hvn_softc *); 482static void hvn_chim_free(struct hvn_softc *, uint32_t); 483static void hvn_fixup_tx_data(struct hvn_softc *); 484static struct mbuf * 485 hvn_set_hlen(struct mbuf *, int *); 486static int hvn_txd_peek(struct hvn_tx_ring *); 487static struct hvn_tx_desc * 488 hvn_txd_get(struct hvn_tx_ring *); 489static void hvn_txd_put(struct hvn_tx_ring *, struct hvn_tx_desc *); 490static void hvn_txd_gc(struct hvn_tx_ring *, struct hvn_tx_desc *); 491static void hvn_txd_hold(struct hvn_tx_desc *); 492static void hvn_txd_agg(struct hvn_tx_desc *, struct hvn_tx_desc *); 493static int hvn_tx_ring_pending(struct hvn_tx_ring *); 494static void hvn_tx_ring_qflush(struct hvn_softc *, struct hvn_tx_ring *); 495static int hvn_get_rsscaps(struct hvn_softc *, int *); 496static int hvn_set_rss(struct hvn_softc *, uint16_t); 497static void hvn_fixup_rss_ind(struct hvn_softc *); 498static int hvn_get_hwcaps(struct hvn_softc *, struct ndis_offload *); 499static int hvn_set_capabilities(struct hvn_softc *, int); 500static int hvn_get_lladdr(struct hvn_softc *, uint8_t *); 501static void hvn_update_link_status(struct hvn_softc *); 502static int hvn_get_mtu(struct hvn_softc *, uint32_t *); 503static int hvn_channel_attach(struct hvn_softc *, struct vmbus_channel *); 504static void hvn_channel_detach(struct hvn_softc *, struct vmbus_channel *); 505static void hvn_channel_detach_all(struct hvn_softc *); 506static int hvn_subchannel_attach(struct hvn_softc *); 507static int hvn_synth_alloc_subchannels(struct hvn_softc *, int *); 508static int hvn_synth_attachable(const struct hvn_softc *); 509static int hvn_synth_attach(struct hvn_softc *, int); 510static void hvn_synth_detach(struct hvn_softc *); 511static void hvn_set_ring_inuse(struct hvn_softc *, int); 512static void hvn_disable_rx(struct hvn_softc *); 513static void hvn_drain_rxtx(struct hvn_softc *, int ); 514static void hvn_suspend_data(struct hvn_softc *); 515static void hvn_suspend_mgmt(struct hvn_softc *); 516static void hvn_suspend(struct hvn_softc *) __unused; 517static void hvn_resume_tx(struct hvn_softc *, int); 518static void hvn_resume_data(struct hvn_softc *); 519static void hvn_resume_mgmt(struct hvn_softc *); 520static void hvn_resume(struct hvn_softc *) __unused; 521static void hvn_init_sysctls(struct hvn_softc *); 522 523/* NSVP */ 524static int hvn_nvs_init(struct hvn_softc *); 525static void hvn_nvs_destroy(struct hvn_softc *); 526static int hvn_nvs_attach(struct hvn_softc *, int); 527static int hvn_nvs_connect_rxbuf(struct hvn_softc *); 528static int hvn_nvs_disconnect_rxbuf(struct hvn_softc *); 529static int hvn_nvs_connect_chim(struct hvn_softc *); 530static int hvn_nvs_disconnect_chim(struct hvn_softc *); 531static void hvn_handle_ring_work(struct work *, void *); 532static void hvn_nvs_softintr(void *); 533static void hvn_nvs_intr(void *); 534static void hvn_nvs_intr1(struct hvn_rx_ring *, int, int); 535static int hvn_nvs_cmd(struct hvn_softc *, void *, size_t, uint64_t, 536 u_int); 537static int hvn_nvs_ack(struct hvn_rx_ring *, uint64_t); 538static void hvn_nvs_detach(struct hvn_softc *); 539static int hvn_nvs_alloc_subchannels(struct hvn_softc *, int *); 540 541/* RNDIS */ 542static int hvn_rndis_init(struct hvn_softc *); 543static void hvn_rndis_destroy(struct hvn_softc *); 544static int hvn_rndis_attach(struct hvn_softc *, int); 545static int hvn_rndis_cmd(struct hvn_softc *, struct rndis_cmd *, u_int); 546static int hvn_rndis_input(struct hvn_rx_ring *, uint64_t, void *); 547static int hvn_rxeof(struct hvn_rx_ring *, uint8_t *, uint32_t); 548static void hvn_rndis_complete(struct hvn_softc *, uint8_t *, uint32_t); 549static int hvn_rndis_output_sgl(struct hvn_tx_ring *, 550 struct hvn_tx_desc *); 551static int hvn_rndis_output_chim(struct hvn_tx_ring *, 552 struct hvn_tx_desc *); 553static void hvn_rndis_status(struct hvn_softc *, uint8_t *, uint32_t); 554static int hvn_rndis_query(struct hvn_softc *, uint32_t, void *, size_t *); 555static int hvn_rndis_query2(struct hvn_softc *, uint32_t, const void *, 556 size_t, void *, size_t *, size_t); 557static int hvn_rndis_set(struct hvn_softc *, uint32_t, void *, size_t); 558static int hvn_rndis_open(struct hvn_softc *); 559static int hvn_rndis_close(struct hvn_softc *); 560static void hvn_rndis_detach(struct hvn_softc *); 561 562static int 563hvn_match(device_t parent, cfdata_t match, void *aux) 564{ 565 struct vmbus_attach_args *aa = aux; 566 567 if (memcmp(aa->aa_type, &hyperv_guid_network, sizeof(*aa->aa_type))) 568 return 0; 569 return 1; 570} 571 572static void 573hvn_attach(device_t parent, device_t self, void *aux) 574{ 575 struct hvn_softc *sc = device_private(self); 576 struct vmbus_attach_args *aa = aux; 577 struct ifnet *ifp = SC2IFP(sc); 578 char xnamebuf[32]; 579 uint8_t enaddr[ETHER_ADDR_LEN]; 580 uint32_t mtu; 581 int tx_ring_cnt, ring_cnt; 582 int error; 583 584 sc->sc_dev = self; 585 sc->sc_vmbus = (struct vmbus_softc *)device_private(parent); 586 sc->sc_prichan = aa->aa_chan; 587 sc->sc_dmat = sc->sc_vmbus->sc_dmat; 588 589 aprint_naive("\n"); 590 aprint_normal(": Hyper-V NetVSC\n"); 591 592 sc->sc_txrx_workqueue = true; 593 sc->sc_tx_process_limit = HVN_TX_PROCESS_LIMIT_DEFAULT; 594 sc->sc_rx_process_limit = HVN_RX_PROCESS_LIMIT_DEFAULT; 595 sc->sc_tx_intr_process_limit = HVN_TX_INTR_PROCESS_LIMIT_DEFAULT; 596 sc->sc_rx_intr_process_limit = HVN_RX_INTR_PROCESS_LIMIT_DEFAULT; 597 sc->sc_agg_size = hvn_tx_agg_size; 598 sc->sc_agg_pkts = hvn_tx_agg_pkts; 599 600 mutex_init(&sc->sc_core_lock, MUTEX_DEFAULT, IPL_SOFTNET); 601 mutex_init(&sc->sc_link_lock, MUTEX_DEFAULT, IPL_NET); 602 cv_init(&sc->sc_link_cv, "hvnknkcv"); 603 callout_init(&sc->sc_link_tmout, CALLOUT_MPSAFE); 604 callout_setfunc(&sc->sc_link_tmout, hvn_link_netchg_tmout_cb, sc); 605 if (kthread_create(PRI_NONE, KTHREAD_MUSTJOIN | KTHREAD_MPSAFE, NULL, 606 hvn_link_task, sc, &sc->sc_link_lwp, "%slink", 607 device_xname(self))) { 608 aprint_error_dev(self, "failed to create link thread\n"); 609 return; 610 } 611 612 snprintf(xnamebuf, sizeof(xnamebuf), "%srxtx", device_xname(self)); 613 if (workqueue_create(&sc->sc_wq, xnamebuf, hvn_handle_ring_work, 614 sc, HVN_WORKQUEUE_PRI, IPL_NET, WQ_PERCPU | WQ_MPSAFE)) { 615 aprint_error_dev(self, "failed to create workqueue\n"); 616 sc->sc_wq = NULL; 617 goto destroy_link_thread; 618 } 619 620 ring_cnt = hvn_channel_cnt; 621 if (ring_cnt <= 0) { 622 ring_cnt = ncpu; 623 if (ring_cnt > HVN_CHANNEL_MAX_COUNT_DEFAULT) 624 ring_cnt = HVN_CHANNEL_MAX_COUNT_DEFAULT; 625 } else if (ring_cnt > ncpu) 626 ring_cnt = ncpu; 627 628 tx_ring_cnt = hvn_tx_ring_cnt; 629 if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt) 630 tx_ring_cnt = ring_cnt; 631 632 if (hvn_tx_ring_create(sc, tx_ring_cnt)) { 633 aprint_error_dev(self, "failed to create Tx ring\n"); 634 goto destroy_wq; 635 } 636 637 if (hvn_rx_ring_create(sc, ring_cnt)) { 638 aprint_error_dev(self, "failed to create Rx ring\n"); 639 goto destroy_tx_ring; 640 } 641 642 strlcpy(ifp->if_xname, device_xname(sc->sc_dev), IFNAMSIZ); 643 ifp->if_softc = sc; 644 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 645 ifp->if_extflags = IFEF_MPSAFE; 646 ifp->if_ioctl = hvn_ioctl; 647 ifp->if_start = hvn_start; 648 ifp->if_transmit = hvn_transmit; 649 ifp->if_init = hvn_init; 650 ifp->if_stop = hvn_stop; 651 ifp->if_baudrate = IF_Gbps(10); 652 653 IFQ_SET_MAXLEN(&ifp->if_snd, uimax(HVN_TX_DESC - 1, IFQ_MAXLEN)); 654 IFQ_SET_READY(&ifp->if_snd); 655 656 /* Initialize ifmedia structures. */ 657 sc->sc_ec.ec_ifmedia = &sc->sc_media; 658 ifmedia_init_with_lock(&sc->sc_media, IFM_IMASK, 659 hvn_media_change, hvn_media_status, &sc->sc_core_lock); 660 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); 661 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10G_T | IFM_FDX, 0, NULL); 662 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10G_T, 0, NULL); 663 ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); 664 665 if_initialize(ifp); 666 sc->sc_ipq = if_percpuq_create(ifp); 667 if_deferred_start_init(ifp, NULL); 668 669 hvn_nvs_init(sc); 670 hvn_rndis_init(sc); 671 if (hvn_synth_attach(sc, ETHERMTU)) { 672 aprint_error_dev(self, "failed to attach synth\n"); 673 goto destroy_if_percpuq; 674 } 675 676 aprint_normal_dev(self, "NVS %d.%d NDIS %d.%d\n", 677 sc->sc_proto >> 16, sc->sc_proto & 0xffff, 678 sc->sc_ndisver >> 16 , sc->sc_ndisver & 0xffff); 679 680 if (hvn_get_lladdr(sc, enaddr)) { 681 aprint_error_dev(self, 682 "failed to obtain an ethernet address\n"); 683 goto detach_synth; 684 } 685 aprint_normal_dev(self, "Ethernet address %s\n", ether_sprintf(enaddr)); 686 687 /* 688 * Fixup TX/RX stuffs after synthetic parts are attached. 689 */ 690 hvn_fixup_tx_data(sc); 691 hvn_fixup_rx_data(sc); 692 693 ifp->if_capabilities |= sc->sc_txr[0].txr_caps_assist & 694 (IFCAP_CSUM_IPv4_Tx | IFCAP_CSUM_IPv4_Rx | 695 IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_TCPv4_Rx | 696 IFCAP_CSUM_TCPv6_Tx | IFCAP_CSUM_TCPv6_Rx | 697 IFCAP_CSUM_UDPv4_Tx | IFCAP_CSUM_UDPv4_Rx | 698 IFCAP_CSUM_UDPv6_Tx | IFCAP_CSUM_UDPv6_Rx); 699 /* XXX TSOv4, TSOv6 */ 700 if (sc->sc_caps & HVN_CAPS_VLAN) { 701 /* XXX not sure about VLAN_MTU. */ 702 sc->sc_ec.ec_capabilities |= ETHERCAP_VLAN_HWTAGGING; 703 sc->sc_ec.ec_capabilities |= ETHERCAP_VLAN_MTU; 704 } 705 sc->sc_ec.ec_capabilities |= ETHERCAP_JUMBO_MTU; 706 707 ether_ifattach(ifp, enaddr); 708 709 error = hvn_get_mtu(sc, &mtu); 710 if (error) 711 mtu = ETHERMTU; 712 if (mtu < ETHERMTU) { 713 DPRINTF("%s: fixup mtu %u -> %u\n", device_xname(sc->sc_dev), 714 ETHERMTU, mtu); 715 ifp->if_mtu = mtu; 716 } 717 718 if_register(ifp); 719 720 /* 721 * Kick off link status check. 722 */ 723 hvn_link_event(sc, HVN_LINK_EV_STATE_CHANGE); 724 725 hvn_init_sysctls(sc); 726 727 if (pmf_device_register(self, NULL, NULL)) 728 pmf_class_network_register(self, ifp); 729 else 730 aprint_error_dev(self, "couldn't establish power handler\n"); 731 732 SET(sc->sc_flags, HVN_SCF_ATTACHED); 733 return; 734 735detach_synth: 736 hvn_synth_detach(sc); 737 hvn_rndis_destroy(sc); 738 hvn_nvs_destroy(sc); 739destroy_if_percpuq: 740 if_percpuq_destroy(sc->sc_ipq); 741 hvn_rx_ring_destroy(sc); 742destroy_tx_ring: 743 hvn_tx_ring_destroy(sc); 744destroy_wq: 745 workqueue_destroy(sc->sc_wq); 746 sc->sc_wq = NULL; 747destroy_link_thread: 748 hvn_link_event(sc, HVN_LINK_EV_EXIT_THREAD); 749 kthread_join(sc->sc_link_lwp); 750 callout_destroy(&sc->sc_link_tmout); 751 cv_destroy(&sc->sc_link_cv); 752 mutex_destroy(&sc->sc_link_lock); 753 mutex_destroy(&sc->sc_core_lock); 754} 755 756static int 757hvn_detach(device_t self, int flags) 758{ 759 struct hvn_softc *sc = device_private(self); 760 struct ifnet *ifp = SC2IFP(sc); 761 762 if (!ISSET(sc->sc_flags, HVN_SCF_ATTACHED)) 763 return 0; 764 765 if (vmbus_channel_is_revoked(sc->sc_prichan)) 766 SET(sc->sc_flags, HVN_SCF_REVOKED); 767 768 pmf_device_deregister(self); 769 770 mutex_enter(&sc->sc_core_lock); 771 772 if (ifp->if_flags & IFF_RUNNING) 773 hvn_stop_locked(ifp); 774 /* 775 * NOTE: 776 * hvn_stop() only suspends data, so management 777 * stuffs have to be suspended manually here. 778 */ 779 hvn_suspend_mgmt(sc); 780 781 ether_ifdetach(ifp); 782 if_detach(ifp); 783 if_percpuq_destroy(sc->sc_ipq); 784 785 hvn_link_event(sc, HVN_LINK_EV_EXIT_THREAD); 786 kthread_join(sc->sc_link_lwp); 787 callout_halt(&sc->sc_link_tmout, NULL); 788 789 hvn_synth_detach(sc); 790 hvn_rndis_destroy(sc); 791 hvn_nvs_destroy(sc); 792 793 mutex_exit(&sc->sc_core_lock); 794 795 hvn_rx_ring_destroy(sc); 796 hvn_tx_ring_destroy(sc); 797 798 workqueue_destroy(sc->sc_wq); 799 callout_destroy(&sc->sc_link_tmout); 800 cv_destroy(&sc->sc_link_cv); 801 mutex_destroy(&sc->sc_link_lock); 802 mutex_destroy(&sc->sc_core_lock); 803 804 sysctl_teardown(&sc->sc_sysctllog); 805 806 return 0; 807} 808 809static int 810hvn_ioctl(struct ifnet *ifp, u_long command, void * data) 811{ 812 struct hvn_softc *sc = IFP2SC(ifp); 813 struct ifreq *ifr = (struct ifreq *)data; 814 uint32_t mtu; 815 int s, error = 0; 816 817 switch (command) { 818 case SIOCSIFMTU: 819 if (ifr->ifr_mtu < HVN_MTU_MIN || ifr->ifr_mtu > HVN_MTU_MAX) { 820 error = EINVAL; 821 break; 822 } 823 824 mutex_enter(&sc->sc_core_lock); 825 826 if (!(sc->sc_caps & HVN_CAPS_MTU)) { 827 /* Can't change MTU */ 828 mutex_exit(&sc->sc_core_lock); 829 error = EOPNOTSUPP; 830 break; 831 } 832 833 if (ifp->if_mtu == ifr->ifr_mtu) { 834 mutex_exit(&sc->sc_core_lock); 835 break; 836 } 837 838 /* 839 * Suspend this interface before the synthetic parts 840 * are ripped. 841 */ 842 hvn_suspend(sc); 843 844 /* 845 * Detach the synthetics parts, i.e. NVS and RNDIS. 846 */ 847 hvn_synth_detach(sc); 848 849 /* 850 * Reattach the synthetic parts, i.e. NVS and RNDIS, 851 * with the new MTU setting. 852 */ 853 error = hvn_synth_attach(sc, ifr->ifr_mtu); 854 if (error) { 855 mutex_exit(&sc->sc_core_lock); 856 break; 857 } 858 859 error = hvn_get_mtu(sc, &mtu); 860 if (error) 861 mtu = ifr->ifr_mtu; 862 DPRINTF("%s: RNDIS mtu=%d\n", device_xname(sc->sc_dev), mtu); 863 864 /* 865 * Commit the requested MTU, after the synthetic parts 866 * have been successfully attached. 867 */ 868 if (mtu >= ifr->ifr_mtu) { 869 mtu = ifr->ifr_mtu; 870 } else { 871 DPRINTF("%s: fixup mtu %d -> %u\n", 872 device_xname(sc->sc_dev), ifr->ifr_mtu, mtu); 873 } 874 ifp->if_mtu = mtu; 875 876 /* 877 * Synthetic parts' reattach may change the chimney 878 * sending size; update it. 879 */ 880 if (sc->sc_txr[0].txr_chim_size > sc->sc_chim_szmax) 881 hvn_set_chim_size(sc, sc->sc_chim_szmax); 882 883 /* 884 * All done! Resume the interface now. 885 */ 886 hvn_resume(sc); 887 888 mutex_exit(&sc->sc_core_lock); 889 break; 890 default: 891 s = splnet(); 892 if (command == SIOCGIFMEDIA || command == SIOCSIFMEDIA) 893 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, command); 894 else 895 error = ether_ioctl(ifp, command, data); 896 splx(s); 897 if (error == ENETRESET) { 898 mutex_enter(&sc->sc_core_lock); 899 if (ifp->if_flags & IFF_RUNNING) 900 hvn_init_locked(ifp); 901 mutex_exit(&sc->sc_core_lock); 902 error = 0; 903 } 904 break; 905 } 906 907 return error; 908} 909 910static int 911hvn_media_change(struct ifnet *ifp) 912{ 913 struct hvn_softc *sc = IFP2SC(ifp); 914 struct ifmedia *ifm = &sc->sc_media; 915 916 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) 917 return EINVAL; 918 919 switch (IFM_SUBTYPE(ifm->ifm_media)) { 920 case IFM_AUTO: 921 break; 922 default: 923 device_printf(sc->sc_dev, "Only auto media type\n"); 924 return EINVAL; 925 } 926 return 0; 927} 928 929static void 930hvn_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 931{ 932 struct hvn_softc *sc = IFP2SC(ifp); 933 934 ifmr->ifm_status = IFM_AVALID; 935 ifmr->ifm_active = IFM_ETHER; 936 937 if (sc->sc_link_state != LINK_STATE_UP) { 938 ifmr->ifm_active |= IFM_NONE; 939 return; 940 } 941 942 ifmr->ifm_status |= IFM_ACTIVE; 943 ifmr->ifm_active |= IFM_10G_T | IFM_FDX; 944} 945 946static void 947hvn_link_task(void *arg) 948{ 949 struct hvn_softc *sc = arg; 950 struct ifnet *ifp = SC2IFP(sc); 951 uint32_t event; 952 int old_link_state; 953 954 mutex_enter(&sc->sc_link_lock); 955 sc->sc_link_onproc = false; 956 for (;;) { 957 if (sc->sc_link_ev == 0) { 958 cv_wait(&sc->sc_link_cv, &sc->sc_link_lock); 959 continue; 960 } 961 962 sc->sc_link_onproc = true; 963 event = sc->sc_link_ev; 964 sc->sc_link_ev = 0; 965 mutex_exit(&sc->sc_link_lock); 966 967 if (event & HVN_LINK_EV_EXIT_THREAD) 968 break; 969 970 if (sc->sc_link_suspend) 971 goto next; 972 973 if (event & HVN_LINK_EV_RESUME_NETWORK) { 974 if (sc->sc_link_pending) 975 event |= HVN_LINK_EV_NETWORK_CHANGE; 976 else 977 event |= HVN_LINK_EV_STATE_CHANGE; 978 } 979 980 if (event & HVN_LINK_EV_NETWORK_CHANGE) { 981 /* Prevent any link status checks from running. */ 982 sc->sc_link_pending = true; 983 984 /* 985 * Fake up a [link down --> link up] state change; 986 * 5 seconds delay is used, which closely simulates 987 * miibus reaction upon link down event. 988 */ 989 old_link_state = sc->sc_link_state; 990 sc->sc_link_state = LINK_STATE_DOWN; 991 if (old_link_state != sc->sc_link_state) { 992 if_link_state_change(ifp, LINK_STATE_DOWN); 993 } 994#if defined(HVN_LINK_STATE_CHANGE_DELAY) && HVN_LINK_STATE_CHANGE_DELAY > 0 995 callout_schedule(&sc->sc_link_tmout, 996 mstohz(HVN_LINK_STATE_CHANGE_DELAY)); 997#else 998 hvn_link_event(sc, HVN_LINK_EV_NETWORK_CHANGE_TMOUT); 999#endif 1000 } else if (event & HVN_LINK_EV_NETWORK_CHANGE_TMOUT) { 1001 /* Re-allow link status checks. */ 1002 sc->sc_link_pending = false; 1003 hvn_update_link_status(sc); 1004 } else if (event & HVN_LINK_EV_STATE_CHANGE) { 1005 if (!sc->sc_link_pending) 1006 hvn_update_link_status(sc); 1007 } 1008 next: 1009 mutex_enter(&sc->sc_link_lock); 1010 sc->sc_link_onproc = false; 1011 } 1012 1013 mutex_enter(&sc->sc_link_lock); 1014 sc->sc_link_onproc = false; 1015 mutex_exit(&sc->sc_link_lock); 1016 1017 kthread_exit(0); 1018} 1019 1020static void 1021hvn_link_event(struct hvn_softc *sc, uint32_t ev) 1022{ 1023 1024 mutex_enter(&sc->sc_link_lock); 1025 SET(sc->sc_link_ev, ev); 1026 cv_signal(&sc->sc_link_cv); 1027 mutex_exit(&sc->sc_link_lock); 1028} 1029 1030static void 1031hvn_link_netchg_tmout_cb(void *arg) 1032{ 1033 struct hvn_softc *sc = arg; 1034 1035 hvn_link_event(sc, HVN_LINK_EV_NETWORK_CHANGE_TMOUT); 1036} 1037 1038static int 1039hvn_init(struct ifnet *ifp) 1040{ 1041 struct hvn_softc *sc = IFP2SC(ifp); 1042 int error; 1043 1044 mutex_enter(&sc->sc_core_lock); 1045 error = hvn_init_locked(ifp); 1046 mutex_exit(&sc->sc_core_lock); 1047 1048 return error; 1049} 1050 1051static int 1052hvn_init_locked(struct ifnet *ifp) 1053{ 1054 struct hvn_softc *sc = IFP2SC(ifp); 1055 int error; 1056 1057 KASSERT(mutex_owned(&sc->sc_core_lock)); 1058 1059 hvn_stop_locked(ifp); 1060 1061 error = hvn_rndis_open(sc); 1062 if (error) 1063 return error; 1064 1065 /* Clear TX 'suspended' bit. */ 1066 hvn_resume_tx(sc, sc->sc_ntxr_inuse); 1067 1068 /* Everything is ready; unleash! */ 1069 ifp->if_flags |= IFF_RUNNING; 1070 1071 return 0; 1072} 1073 1074static void 1075hvn_stop(struct ifnet *ifp, int disable) 1076{ 1077 struct hvn_softc *sc = IFP2SC(ifp); 1078 1079 mutex_enter(&sc->sc_core_lock); 1080 hvn_stop_locked(ifp); 1081 mutex_exit(&sc->sc_core_lock); 1082} 1083 1084static void 1085hvn_stop_locked(struct ifnet *ifp) 1086{ 1087 struct hvn_softc *sc = IFP2SC(ifp); 1088 int i; 1089 1090 KASSERT(mutex_owned(&sc->sc_core_lock)); 1091 1092 /* Clear RUNNING bit ASAP. */ 1093 ifp->if_flags &= ~IFF_RUNNING; 1094 1095 /* Suspend data transfers. */ 1096 hvn_suspend_data(sc); 1097 1098 /* Clear OACTIVE state. */ 1099 for (i = 0; i < sc->sc_ntxr_inuse; i++) 1100 sc->sc_txr[i].txr_oactive = 0; 1101} 1102 1103static void 1104hvn_transmit_common(struct ifnet *ifp, struct hvn_tx_ring *txr, 1105 bool is_transmit) 1106{ 1107 struct hvn_tx_desc *txd; 1108 struct mbuf *m; 1109 int l2hlen = ETHER_HDR_LEN; 1110 1111 KASSERT(mutex_owned(&txr->txr_lock)); 1112 1113 if (!(ifp->if_flags & IFF_RUNNING)) 1114 return; 1115 if (txr->txr_oactive) 1116 return; 1117 if (txr->txr_suspended) 1118 return; 1119 1120 for (;;) { 1121 if (!hvn_txd_peek(txr)) { 1122 /* transient */ 1123 txr->txr_oactive = 1; 1124 txr->txr_evnodesc.ev_count++; 1125 break; 1126 } 1127 1128 if (is_transmit) 1129 m = pcq_get(txr->txr_interq); 1130 else 1131 IFQ_DEQUEUE(&ifp->if_snd, m); 1132 if (m == NULL) 1133 break; 1134 1135#if defined(INET) || defined(INET6) 1136 if (m->m_pkthdr.csum_flags & 1137 (M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_TCPv6|M_CSUM_UDPv6)) { 1138 m = hvn_set_hlen(m, &l2hlen); 1139 if (__predict_false(m == NULL)) { 1140 if_statinc(ifp, if_oerrors); 1141 continue; 1142 } 1143 } 1144#endif 1145 1146 txd = hvn_txd_get(txr); 1147 if (hvn_encap(txr, txd, m, l2hlen)) { 1148 /* the chain is too large */ 1149 if_statinc(ifp, if_oerrors); 1150 hvn_txd_put(txr, txd); 1151 m_freem(m); 1152 continue; 1153 } 1154 1155 if (txr->txr_agg_pktleft == 0) { 1156 if (txr->txr_agg_txd != NULL) { 1157 hvn_flush_txagg(txr); 1158 } else { 1159 if (hvn_txpkt(txr, txd)) { 1160 /* txd is freed, but m is not. */ 1161 m_freem(m); 1162 if_statinc(ifp, if_oerrors); 1163 } 1164 } 1165 } 1166 } 1167 1168 /* Flush pending aggerated transmission. */ 1169 if (txr->txr_agg_txd != NULL) 1170 hvn_flush_txagg(txr); 1171} 1172 1173static void 1174hvn_start(struct ifnet *ifp) 1175{ 1176 struct hvn_softc *sc = IFP2SC(ifp); 1177 struct hvn_tx_ring *txr = &sc->sc_txr[0]; 1178 1179 mutex_enter(&txr->txr_lock); 1180 hvn_transmit_common(ifp, txr, false); 1181 mutex_exit(&txr->txr_lock); 1182} 1183 1184static int 1185hvn_select_txqueue(struct ifnet *ifp, struct mbuf *m __unused) 1186{ 1187 struct hvn_softc *sc = IFP2SC(ifp); 1188 u_int cpu; 1189 1190 cpu = cpu_index(curcpu()); 1191 1192 return cpu % sc->sc_ntxr_inuse; 1193} 1194 1195static int 1196hvn_transmit(struct ifnet *ifp, struct mbuf *m) 1197{ 1198 struct hvn_softc *sc = IFP2SC(ifp); 1199 struct hvn_tx_ring *txr; 1200 int qid; 1201 1202 qid = hvn_select_txqueue(ifp, m); 1203 txr = &sc->sc_txr[qid]; 1204 1205 if (__predict_false(!pcq_put(txr->txr_interq, m))) { 1206 mutex_enter(&txr->txr_lock); 1207 txr->txr_evpcqdrop.ev_count++; 1208 mutex_exit(&txr->txr_lock); 1209 m_freem(m); 1210 return ENOBUFS; 1211 } 1212 1213 kpreempt_disable(); 1214 softint_schedule(txr->txr_si); 1215 kpreempt_enable(); 1216 return 0; 1217} 1218 1219static void 1220hvn_deferred_transmit(void *arg) 1221{ 1222 struct hvn_tx_ring *txr = arg; 1223 struct hvn_softc *sc = txr->txr_softc; 1224 struct ifnet *ifp = SC2IFP(sc); 1225 1226 mutex_enter(&txr->txr_lock); 1227 txr->txr_evtransmitdefer.ev_count++; 1228 hvn_transmit_common(ifp, txr, true); 1229 mutex_exit(&txr->txr_lock); 1230} 1231 1232static inline char * 1233hvn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize, 1234 size_t datalen, uint32_t type) 1235{ 1236 struct rndis_pktinfo *pi; 1237 size_t pi_size = sizeof(*pi) + datalen; 1238 char *cp; 1239 1240 KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <= 1241 pktsize); 1242 1243 cp = (char *)pkt + pkt->rm_pktinfooffset + pkt->rm_pktinfolen; 1244 pi = (struct rndis_pktinfo *)cp; 1245 pi->rm_size = pi_size; 1246 pi->rm_type = type; 1247 pi->rm_pktinfooffset = sizeof(*pi); 1248 pkt->rm_pktinfolen += pi_size; 1249 pkt->rm_dataoffset += pi_size; 1250 pkt->rm_len += pi_size; 1251 1252 return (char *)pi->rm_data; 1253} 1254 1255static struct mbuf * 1256hvn_pullup_hdr(struct mbuf *m, int len) 1257{ 1258 struct mbuf *mn; 1259 1260 if (__predict_false(m->m_len < len)) { 1261 mn = m_pullup(m, len); 1262 if (mn == NULL) 1263 return NULL; 1264 m = mn; 1265 } 1266 return m; 1267} 1268 1269/* 1270 * NOTE: If this function failed, the m would be freed. 1271 */ 1272static struct mbuf * 1273hvn_set_hlen(struct mbuf *m, int *l2hlenp) 1274{ 1275 const struct ether_header *eh; 1276 int l2hlen, off; 1277 1278 m = hvn_pullup_hdr(m, sizeof(*eh)); 1279 if (m == NULL) 1280 return NULL; 1281 1282 eh = mtod(m, const struct ether_header *); 1283 if (eh->ether_type == ntohs(ETHERTYPE_VLAN)) 1284 l2hlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 1285 else 1286 l2hlen = ETHER_HDR_LEN; 1287 1288#if defined(INET) 1289 if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4 | M_CSUM_UDPv4)) { 1290 const struct ip *ip; 1291 1292 off = l2hlen + sizeof(*ip); 1293 m = hvn_pullup_hdr(m, off); 1294 if (m == NULL) 1295 return NULL; 1296 1297 ip = (struct ip *)((mtod(m, uint8_t *)) + off); 1298 1299 /* 1300 * UDP checksum offload does not work in Azure, if the 1301 * following conditions meet: 1302 * - sizeof(IP hdr + UDP hdr + payload) > 1420. 1303 * - IP_DF is not set in the IP hdr. 1304 * 1305 * Fallback to software checksum for these UDP datagrams. 1306 */ 1307 if ((m->m_pkthdr.csum_flags & M_CSUM_UDPv4) && 1308 m->m_pkthdr.len > hvn_udpcs_fixup_mtu + l2hlen && 1309 !(ntohs(ip->ip_off) & IP_DF)) { 1310 uint16_t *csump; 1311 1312 off = l2hlen + 1313 M_CSUM_DATA_IPv4_IPHL(m->m_pkthdr.csum_data); 1314 m = hvn_pullup_hdr(m, off + sizeof(struct udphdr)); 1315 if (m == NULL) 1316 return NULL; 1317 1318 csump = (uint16_t *)(mtod(m, uint8_t *) + off + 1319 M_CSUM_DATA_IPv4_OFFSET(m->m_pkthdr.csum_data)); 1320 *csump = cpu_in_cksum(m, m->m_pkthdr.len - off, off, 0); 1321 m->m_pkthdr.csum_flags &= ~M_CSUM_UDPv4; 1322 } 1323 } 1324#endif /* INET */ 1325#if defined(INET) && defined(INET6) 1326 else 1327#endif /* INET && INET6 */ 1328#if defined(INET6) 1329 { 1330 const struct ip6_hdr *ip6; 1331 1332 off = l2hlen + sizeof(*ip6); 1333 m = hvn_pullup_hdr(m, off); 1334 if (m == NULL) 1335 return NULL; 1336 1337 ip6 = (struct ip6_hdr *)((mtod(m, uint8_t *)) + l2hlen); 1338 if (ip6->ip6_nxt != IPPROTO_TCP && 1339 ip6->ip6_nxt != IPPROTO_UDP) { 1340 m_freem(m); 1341 return NULL; 1342 } 1343 } 1344#endif /* INET6 */ 1345 1346 *l2hlenp = l2hlen; 1347 1348 return m; 1349} 1350 1351static int 1352hvn_flush_txagg(struct hvn_tx_ring *txr) 1353{ 1354 struct hvn_softc *sc = txr->txr_softc; 1355 struct ifnet *ifp = SC2IFP(sc); 1356 struct hvn_tx_desc *txd; 1357 struct mbuf *m; 1358 int error, pkts; 1359 1360 txd = txr->txr_agg_txd; 1361 KASSERTMSG(txd != NULL, "no aggregate txdesc"); 1362 1363 /* 1364 * Since hvn_txpkt() will reset this temporary stat, save 1365 * it now, so that oerrors can be updated properly, if 1366 * hvn_txpkt() ever fails. 1367 */ 1368 pkts = txr->txr_stat_pkts; 1369 1370 /* 1371 * Since txd's mbuf will _not_ be freed upon hvn_txpkt() 1372 * failure, save it for later freeing, if hvn_txpkt() ever 1373 * fails. 1374 */ 1375 m = txd->txd_buf; 1376 error = hvn_txpkt(txr, txd); 1377 if (__predict_false(error)) { 1378 /* txd is freed, but m is not. */ 1379 m_freem(m); 1380 txr->txr_evflushfailed.ev_count++; 1381 if_statadd(ifp, if_oerrors, pkts); 1382 } 1383 1384 /* Reset all aggregation states. */ 1385 txr->txr_agg_txd = NULL; 1386 txr->txr_agg_szleft = 0; 1387 txr->txr_agg_pktleft = 0; 1388 txr->txr_agg_prevpkt = NULL; 1389 1390 return error; 1391} 1392 1393static void * 1394hvn_try_txagg(struct hvn_tx_ring *txr, struct hvn_tx_desc *txd, int pktsz) 1395{ 1396 struct hvn_softc *sc = txr->txr_softc; 1397 struct hvn_tx_desc *agg_txd; 1398 struct rndis_packet_msg *pkt; 1399 void *chim; 1400 int olen; 1401 1402 if (txr->txr_agg_txd != NULL) { 1403 if (txr->txr_agg_pktleft > 0 && txr->txr_agg_szleft > pktsz) { 1404 agg_txd = txr->txr_agg_txd; 1405 pkt = txr->txr_agg_prevpkt; 1406 1407 /* 1408 * Update the previous RNDIS packet's total length, 1409 * it can be increased due to the mandatory alignment 1410 * padding for this RNDIS packet. And update the 1411 * aggregating txdesc's chimney sending buffer size 1412 * accordingly. 1413 * 1414 * XXX 1415 * Zero-out the padding, as required by the RNDIS spec. 1416 */ 1417 olen = pkt->rm_len; 1418 pkt->rm_len = roundup2(olen, txr->txr_agg_align); 1419 agg_txd->txd_chim_size += pkt->rm_len - olen; 1420 1421 /* Link this txdesc to the parent. */ 1422 hvn_txd_agg(agg_txd, txd); 1423 1424 chim = (uint8_t *)pkt + pkt->rm_len; 1425 /* Save the current packet for later fixup. */ 1426 txr->txr_agg_prevpkt = chim; 1427 1428 txr->txr_agg_pktleft--; 1429 txr->txr_agg_szleft -= pktsz; 1430 if (txr->txr_agg_szleft <= 1431 HVN_PKTSIZE_MIN(txr->txr_agg_align)) { 1432 /* 1433 * Probably can't aggregate more packets, 1434 * flush this aggregating txdesc proactively. 1435 */ 1436 txr->txr_agg_pktleft = 0; 1437 } 1438 1439 /* Done! */ 1440 return chim; 1441 } 1442 hvn_flush_txagg(txr); 1443 } 1444 1445 txr->txr_evchimneytried.ev_count++; 1446 txd->txd_chim_index = hvn_chim_alloc(sc); 1447 if (txd->txd_chim_index == HVN_NVS_CHIM_IDX_INVALID) 1448 return NULL; 1449 txr->txr_evchimney.ev_count++; 1450 1451 chim = sc->sc_chim + (txd->txd_chim_index * sc->sc_chim_szmax); 1452 1453 if (txr->txr_agg_pktmax > 1 && 1454 txr->txr_agg_szmax > pktsz + HVN_PKTSIZE_MIN(txr->txr_agg_align)) { 1455 txr->txr_agg_txd = txd; 1456 txr->txr_agg_pktleft = txr->txr_agg_pktmax - 1; 1457 txr->txr_agg_szleft = txr->txr_agg_szmax - pktsz; 1458 txr->txr_agg_prevpkt = chim; 1459 } 1460 1461 return chim; 1462} 1463 1464static int 1465hvn_encap(struct hvn_tx_ring *txr, struct hvn_tx_desc *txd, struct mbuf *m, 1466 int l2hlen) 1467{ 1468 /* Used to pad ethernet frames with < ETHER_MIN_LEN bytes */ 1469 static const char zero_pad[ETHER_MIN_LEN]; 1470 struct hvn_softc *sc = txr->txr_softc; 1471 struct rndis_packet_msg *pkt; 1472 bus_dma_segment_t *seg; 1473 void *chim = NULL; 1474 size_t pktlen, pktsize; 1475 int l3hlen; 1476 int i, rv; 1477 1478 if (ISSET(sc->sc_caps, HVN_CAPS_VLAN) && !vlan_has_tag(m)) { 1479 struct ether_vlan_header *evl; 1480 1481 m = hvn_pullup_hdr(m, sizeof(*evl)); 1482 if (m == NULL) { 1483 DPRINTF("%s: failed to pullup mbuf\n", 1484 device_xname(sc->sc_dev)); 1485 return -1; 1486 } 1487 1488 evl = mtod(m, struct ether_vlan_header *); 1489 if (evl->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 1490 struct ether_header *eh; 1491 uint16_t proto = evl->evl_proto; 1492 1493 vlan_set_tag(m, ntohs(evl->evl_tag)); 1494 1495 /* 1496 * Trim VLAN tag from header. 1497 */ 1498 memmove((uint8_t *)evl + ETHER_VLAN_ENCAP_LEN, 1499 evl, ETHER_HDR_LEN); 1500 m_adj(m, ETHER_VLAN_ENCAP_LEN); 1501 1502 eh = mtod(m, struct ether_header *); 1503 eh->ether_type = proto; 1504 1505 /* 1506 * Re-padding. See sys/net/if_vlan.c:vlan_start(). 1507 */ 1508 if (m->m_pkthdr.len < (ETHER_MIN_LEN - ETHER_CRC_LEN + 1509 ETHER_VLAN_ENCAP_LEN)) { 1510 m_copyback(m, m->m_pkthdr.len, 1511 (ETHER_MIN_LEN - ETHER_CRC_LEN + 1512 ETHER_VLAN_ENCAP_LEN) - 1513 m->m_pkthdr.len, zero_pad); 1514 } 1515 1516 txr->txr_evvlanfixup.ev_count++; 1517 } 1518 } 1519 1520 pkt = txd->txd_req; 1521 pktsize = HVN_PKTSIZE(m, txr->txr_agg_align); 1522 if (pktsize < txr->txr_chim_size) { 1523 chim = hvn_try_txagg(txr, txd, pktsize); 1524 if (chim != NULL) 1525 pkt = chim; 1526 } else { 1527 if (txr->txr_agg_txd != NULL) 1528 hvn_flush_txagg(txr); 1529 } 1530 1531 memset(pkt, 0, HVN_RNDIS_PKT_LEN); 1532 pkt->rm_type = REMOTE_NDIS_PACKET_MSG; 1533 pkt->rm_len = sizeof(*pkt) + m->m_pkthdr.len; 1534 pkt->rm_dataoffset = RNDIS_DATA_OFFSET; 1535 pkt->rm_datalen = m->m_pkthdr.len; 1536 pkt->rm_pktinfooffset = sizeof(*pkt); /* adjusted below */ 1537 pkt->rm_pktinfolen = 0; 1538 1539 if (txr->txr_flags & HVN_TXR_FLAG_UDP_HASH) { 1540 char *cp; 1541 1542 /* 1543 * Set the hash value for this packet, so that the host could 1544 * dispatch the TX done event for this packet back to this TX 1545 * ring's channel. 1546 */ 1547 cp = hvn_rndis_pktinfo_append(pkt, HVN_RNDIS_PKT_LEN, 1548 HVN_NDIS_HASH_VALUE_SIZE, HVN_NDIS_PKTINFO_TYPE_HASHVAL); 1549 memcpy(cp, &txr->txr_id, HVN_NDIS_HASH_VALUE_SIZE); 1550 } 1551 1552 if (vlan_has_tag(m)) { 1553 uint32_t vlan; 1554 char *cp; 1555 uint16_t tag; 1556 1557 tag = vlan_get_tag(m); 1558 vlan = NDIS_VLAN_INFO_MAKE(EVL_VLANOFTAG(tag), 1559 EVL_PRIOFTAG(tag), EVL_CFIOFTAG(tag)); 1560 cp = hvn_rndis_pktinfo_append(pkt, HVN_RNDIS_PKT_LEN, 1561 NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN); 1562 memcpy(cp, &vlan, NDIS_VLAN_INFO_SIZE); 1563 txr->txr_evvlanhwtagging.ev_count++; 1564 } 1565 1566 if (m->m_pkthdr.csum_flags & txr->txr_csum_assist) { 1567 uint32_t csum; 1568 char *cp; 1569 1570 if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv6 | M_CSUM_UDPv6)) { 1571 csum = NDIS_TXCSUM_INFO_IPV6; 1572 l3hlen = M_CSUM_DATA_IPv6_IPHL(m->m_pkthdr.csum_data); 1573 if (m->m_pkthdr.csum_flags & M_CSUM_TCPv6) 1574 csum |= NDIS_TXCSUM_INFO_MKTCPCS(l2hlen + 1575 l3hlen); 1576 if (m->m_pkthdr.csum_flags & M_CSUM_UDPv6) 1577 csum |= NDIS_TXCSUM_INFO_MKUDPCS(l2hlen + 1578 l3hlen); 1579 } else { 1580 csum = NDIS_TXCSUM_INFO_IPV4; 1581 l3hlen = M_CSUM_DATA_IPv4_IPHL(m->m_pkthdr.csum_data); 1582 if (m->m_pkthdr.csum_flags & M_CSUM_IPv4) 1583 csum |= NDIS_TXCSUM_INFO_IPCS; 1584 if (m->m_pkthdr.csum_flags & M_CSUM_TCPv4) 1585 csum |= NDIS_TXCSUM_INFO_MKTCPCS(l2hlen + 1586 l3hlen); 1587 if (m->m_pkthdr.csum_flags & M_CSUM_UDPv4) 1588 csum |= NDIS_TXCSUM_INFO_MKUDPCS(l2hlen + 1589 l3hlen); 1590 } 1591 cp = hvn_rndis_pktinfo_append(pkt, HVN_RNDIS_PKT_LEN, 1592 NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM); 1593 memcpy(cp, &csum, NDIS_TXCSUM_INFO_SIZE); 1594 } 1595 1596 pktlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen; 1597 pkt->rm_pktinfooffset -= RNDIS_HEADER_OFFSET; 1598 1599 /* 1600 * Fast path: Chimney sending. 1601 */ 1602 if (chim != NULL) { 1603 struct hvn_tx_desc *tgt_txd; 1604 1605 tgt_txd = (txr->txr_agg_txd != NULL) ? txr->txr_agg_txd : txd; 1606 1607 KASSERTMSG(pkt == chim, 1608 "RNDIS pkt not in chimney sending buffer"); 1609 KASSERTMSG(tgt_txd->txd_chim_index != HVN_NVS_CHIM_IDX_INVALID, 1610 "chimney sending buffer is not used"); 1611 1612 tgt_txd->txd_chim_size += pkt->rm_len; 1613 m_copydata(m, 0, m->m_pkthdr.len, (uint8_t *)chim + pktlen); 1614 1615 txr->txr_sendpkt = hvn_rndis_output_chim; 1616 goto done; 1617 } 1618 1619 KASSERTMSG(txr->txr_agg_txd == NULL, "aggregating sglist txdesc"); 1620 KASSERTMSG(txd->txd_chim_index == HVN_NVS_CHIM_IDX_INVALID, 1621 "chimney buffer is used"); 1622 KASSERTMSG(pkt == txd->txd_req, "RNDIS pkt not in txdesc"); 1623 1624 rv = bus_dmamap_load_mbuf(sc->sc_dmat, txd->txd_dmap, m, BUS_DMA_READ | 1625 BUS_DMA_NOWAIT); 1626 switch (rv) { 1627 case 0: 1628 break; 1629 case EFBIG: 1630 if (m_defrag(m, M_NOWAIT) != NULL) { 1631 txr->txr_evdefrag.ev_count++; 1632 if (bus_dmamap_load_mbuf(sc->sc_dmat, txd->txd_dmap, m, 1633 BUS_DMA_READ | BUS_DMA_NOWAIT) == 0) 1634 break; 1635 } 1636 /* FALLTHROUGH */ 1637 default: 1638 DPRINTF("%s: failed to load mbuf\n", device_xname(sc->sc_dev)); 1639 txr->txr_evdmafailed.ev_count++; 1640 return -1; 1641 } 1642 bus_dmamap_sync(sc->sc_dmat, txd->txd_dmap, 1643 0, txd->txd_dmap->dm_mapsize, BUS_DMASYNC_PREWRITE); 1644 SET(txd->txd_flags, HVN_TXD_FLAG_DMAMAP); 1645 1646 /* Attach an RNDIS message to the first slot */ 1647 txd->txd_sgl[0].gpa_page = txd->txd_gpa.gpa_page; 1648 txd->txd_sgl[0].gpa_ofs = txd->txd_gpa.gpa_ofs; 1649 txd->txd_sgl[0].gpa_len = pktlen; 1650 txd->txd_nsge = txd->txd_dmap->dm_nsegs + 1; 1651 1652 for (i = 0; i < txd->txd_dmap->dm_nsegs; i++) { 1653 seg = &txd->txd_dmap->dm_segs[i]; 1654 txd->txd_sgl[1 + i].gpa_page = atop(seg->ds_addr); 1655 txd->txd_sgl[1 + i].gpa_ofs = seg->ds_addr & PAGE_MASK; 1656 txd->txd_sgl[1 + i].gpa_len = seg->ds_len; 1657 } 1658 1659 txd->txd_chim_index = HVN_NVS_CHIM_IDX_INVALID; 1660 txd->txd_chim_size = 0; 1661 txr->txr_sendpkt = hvn_rndis_output_sgl; 1662done: 1663 txd->txd_buf = m; 1664 1665 /* Update temporary stats for later use. */ 1666 txr->txr_stat_pkts++; 1667 txr->txr_stat_size += m->m_pkthdr.len; 1668 if (m->m_flags & M_MCAST) 1669 txr->txr_stat_mcasts++; 1670 1671 return 0; 1672} 1673 1674static void 1675hvn_bpf_mtap(struct hvn_tx_ring *txr, struct mbuf *m, u_int direction) 1676{ 1677 struct hvn_softc *sc = txr->txr_softc; 1678 struct ifnet *ifp = SC2IFP(sc); 1679 struct ether_header *eh; 1680 struct ether_vlan_header evl; 1681 1682 if (!vlan_has_tag(m)) { 1683 bpf_mtap(ifp, m, direction); 1684 return; 1685 } 1686 1687 if (ifp->if_bpf == NULL) 1688 return; 1689 1690 txr->txr_evvlantap.ev_count++; 1691 1692 /* 1693 * Restore a VLAN tag for bpf. 1694 * 1695 * Do not modify contents of the original mbuf, 1696 * because Tx processing on the mbuf is still in progress. 1697 */ 1698 1699 eh = mtod(m, struct ether_header *); 1700 memcpy(evl.evl_dhost, eh->ether_dhost, ETHER_ADDR_LEN * 2); 1701 evl.evl_encap_proto = htons(ETHERTYPE_VLAN); 1702 evl.evl_tag = htons(vlan_get_tag(m)); 1703 evl.evl_proto = eh->ether_type; 1704 1705 /* Do not tap ether header of the original mbuf. */ 1706 m_adj(m, sizeof(*eh)); 1707 1708 bpf_mtap2(ifp->if_bpf, &evl, sizeof(evl), m, direction); 1709 1710 /* Cannot restore ether header of the original mbuf, 1711 * but do not worry about it because just free it. */ 1712} 1713 1714static int 1715hvn_txpkt(struct hvn_tx_ring *txr, struct hvn_tx_desc *txd) 1716{ 1717 struct hvn_softc *sc = txr->txr_softc; 1718 struct ifnet *ifp = SC2IFP(sc); 1719 const struct hvn_tx_desc *tmp_txd; 1720 int error; 1721 1722 /* 1723 * Make sure that this txd and any aggregated txds are not 1724 * freed before bpf_mtap. 1725 */ 1726 hvn_txd_hold(txd); 1727 1728 error = (*txr->txr_sendpkt)(txr, txd); 1729 if (error == 0) { 1730 hvn_bpf_mtap(txr, txd->txd_buf, BPF_D_OUT); 1731 STAILQ_FOREACH(tmp_txd, &txd->txd_agg_list, txd_agg_entry) 1732 hvn_bpf_mtap(txr, tmp_txd->txd_buf, BPF_D_OUT); 1733 1734 if_statadd(ifp, if_opackets, txr->txr_stat_pkts); 1735 if_statadd(ifp, if_obytes, txr->txr_stat_size); 1736 if (txr->txr_stat_mcasts != 0) 1737 if_statadd(ifp, if_omcasts, txr->txr_stat_mcasts); 1738 txr->txr_evpkts.ev_count += txr->txr_stat_pkts; 1739 txr->txr_evsends.ev_count++; 1740 } 1741 1742 hvn_txd_put(txr, txd); 1743 1744 if (__predict_false(error)) { 1745 /* 1746 * Caller will perform further processing on the 1747 * associated mbuf, so don't free it in hvn_txd_put(); 1748 * only unload it from the DMA map in hvn_txd_put(), 1749 * if it was loaded. 1750 */ 1751 txd->txd_buf = NULL; 1752 hvn_txd_put(txr, txd); 1753 } 1754 1755 /* Reset temporary stats, after this sending is done. */ 1756 txr->txr_stat_pkts = 0; 1757 txr->txr_stat_size = 0; 1758 txr->txr_stat_mcasts = 0; 1759 1760 return error; 1761} 1762 1763static void 1764hvn_txeof(struct hvn_tx_ring *txr, uint64_t tid) 1765{ 1766 struct hvn_softc *sc = txr->txr_softc; 1767 struct hvn_tx_desc *txd; 1768 uint32_t id = tid >> 32; 1769 1770 if ((tid & 0xffffffffU) != 0) 1771 return; 1772 1773 id -= HVN_NVS_CHIM_SIG; 1774 if (id >= HVN_TX_DESC) { 1775 device_printf(sc->sc_dev, "tx packet index too large: %u", id); 1776 return; 1777 } 1778 1779 txd = &txr->txr_desc[id]; 1780 1781 if (txd->txd_buf == NULL) 1782 device_printf(sc->sc_dev, "no mbuf @%u\n", id); 1783 1784 hvn_txd_put(txr, txd); 1785} 1786 1787static int 1788hvn_rx_ring_create(struct hvn_softc *sc, int ring_cnt) 1789{ 1790 struct hvn_rx_ring *rxr; 1791 int i; 1792 1793 if (sc->sc_proto <= HVN_NVS_PROTO_VERSION_2) 1794 sc->sc_rx_size = 15 * 1024 * 1024; /* 15MB */ 1795 else 1796 sc->sc_rx_size = 16 * 1024 * 1024; /* 16MB */ 1797 sc->sc_rx_ring = hyperv_dma_alloc(sc->sc_dmat, &sc->sc_rx_dma, 1798 sc->sc_rx_size, PAGE_SIZE, PAGE_SIZE, sc->sc_rx_size / PAGE_SIZE); 1799 if (sc->sc_rx_ring == NULL) { 1800 DPRINTF("%s: failed to allocate Rx ring buffer\n", 1801 device_xname(sc->sc_dev)); 1802 return -1; 1803 } 1804 1805 sc->sc_rxr = kmem_zalloc(sizeof(*rxr) * ring_cnt, KM_SLEEP); 1806 sc->sc_nrxr_inuse = sc->sc_nrxr = ring_cnt; 1807 1808 for (i = 0; i < sc->sc_nrxr; i++) { 1809 rxr = &sc->sc_rxr[i]; 1810 rxr->rxr_softc = sc; 1811 if (i < sc->sc_ntxr) { 1812 rxr->rxr_txr = &sc->sc_txr[i]; 1813 rxr->rxr_txr->txr_rxr = rxr; 1814 } 1815 1816 mutex_init(&rxr->rxr_lock, MUTEX_DEFAULT, IPL_NET); 1817 mutex_init(&rxr->rxr_onwork_lock, MUTEX_DEFAULT, IPL_NET); 1818 cv_init(&rxr->rxr_onwork_cv, "waitonwk"); 1819 1820 snprintf(rxr->rxr_name, sizeof(rxr->rxr_name), 1821 "%s-rx%d", device_xname(sc->sc_dev), i); 1822 evcnt_attach_dynamic(&rxr->rxr_evpkts, EVCNT_TYPE_MISC, 1823 NULL, rxr->rxr_name, "packets received"); 1824 evcnt_attach_dynamic(&rxr->rxr_evcsum_ip, EVCNT_TYPE_MISC, 1825 NULL, rxr->rxr_name, "IP checksum"); 1826 evcnt_attach_dynamic(&rxr->rxr_evcsum_tcp, EVCNT_TYPE_MISC, 1827 NULL, rxr->rxr_name, "TCP checksum"); 1828 evcnt_attach_dynamic(&rxr->rxr_evcsum_udp, EVCNT_TYPE_MISC, 1829 NULL, rxr->rxr_name, "UDP checksum"); 1830 evcnt_attach_dynamic(&rxr->rxr_evvlanhwtagging, EVCNT_TYPE_MISC, 1831 NULL, rxr->rxr_name, "VLAN H/W tagging"); 1832 evcnt_attach_dynamic(&rxr->rxr_evintr, EVCNT_TYPE_INTR, 1833 NULL, rxr->rxr_name, "interrupt on ring"); 1834 evcnt_attach_dynamic(&rxr->rxr_evdefer, EVCNT_TYPE_MISC, 1835 NULL, rxr->rxr_name, "handled queue in workqueue"); 1836 evcnt_attach_dynamic(&rxr->rxr_evdeferreq, EVCNT_TYPE_MISC, 1837 NULL, rxr->rxr_name, "requested defer on ring"); 1838 evcnt_attach_dynamic(&rxr->rxr_evredeferreq, EVCNT_TYPE_MISC, 1839 NULL, rxr->rxr_name, "requested defer in workqueue"); 1840 1841 rxr->rxr_nvsbuf = kmem_zalloc(HVN_NVS_BUFSIZE, KM_SLEEP); 1842 if (rxr->rxr_nvsbuf == NULL) { 1843 DPRINTF("%s: failed to allocate channel data buffer\n", 1844 device_xname(sc->sc_dev)); 1845 goto errout; 1846 } 1847 1848 rxr->rxr_si = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE, 1849 hvn_nvs_softintr, rxr); 1850 if (rxr->rxr_si == NULL) { 1851 DPRINTF("%s: failed to establish rx softint\n", 1852 device_xname(sc->sc_dev)); 1853 goto errout; 1854 } 1855 } 1856 1857 return 0; 1858 1859 errout: 1860 hvn_rx_ring_destroy(sc); 1861 return -1; 1862} 1863 1864static int 1865hvn_rx_ring_destroy(struct hvn_softc *sc) 1866{ 1867 struct hvn_rx_ring *rxr; 1868 int i; 1869 1870 if (sc->sc_rxr != NULL) { 1871 for (i = 0; i < sc->sc_nrxr; i++) { 1872 rxr = &sc->sc_rxr[i]; 1873 1874 if (rxr->rxr_si != NULL) { 1875 softint_disestablish(rxr->rxr_si); 1876 rxr->rxr_si = NULL; 1877 } 1878 1879 if (rxr->rxr_nvsbuf != NULL) { 1880 kmem_free(rxr->rxr_nvsbuf, HVN_NVS_BUFSIZE); 1881 rxr->rxr_nvsbuf = NULL; 1882 } 1883 1884 evcnt_detach(&rxr->rxr_evpkts); 1885 evcnt_detach(&rxr->rxr_evcsum_ip); 1886 evcnt_detach(&rxr->rxr_evcsum_tcp); 1887 evcnt_detach(&rxr->rxr_evcsum_udp); 1888 evcnt_detach(&rxr->rxr_evvlanhwtagging); 1889 evcnt_detach(&rxr->rxr_evintr); 1890 evcnt_detach(&rxr->rxr_evdefer); 1891 evcnt_detach(&rxr->rxr_evdeferreq); 1892 evcnt_detach(&rxr->rxr_evredeferreq); 1893 1894 cv_destroy(&rxr->rxr_onwork_cv); 1895 mutex_destroy(&rxr->rxr_onwork_lock); 1896 mutex_destroy(&rxr->rxr_lock); 1897 } 1898 kmem_free(sc->sc_rxr, sizeof(*rxr) * sc->sc_nrxr); 1899 sc->sc_rxr = NULL; 1900 sc->sc_nrxr = 0; 1901 } 1902 if (sc->sc_rx_ring != NULL) { 1903 hyperv_dma_free(sc->sc_dmat, &sc->sc_rx_dma); 1904 sc->sc_rx_ring = NULL; 1905 } 1906 1907 return 0; 1908} 1909 1910static void 1911hvn_fixup_rx_data(struct hvn_softc *sc) 1912{ 1913 struct hvn_rx_ring *rxr; 1914 int i; 1915 1916 if (sc->sc_caps & HVN_CAPS_UDPHASH) { 1917 for (i = 0; i < sc->sc_nrxr; i++) { 1918 rxr = &sc->sc_rxr[i]; 1919 rxr->rxr_flags |= HVN_RXR_FLAG_UDP_HASH; 1920 } 1921 } 1922} 1923 1924static int 1925hvn_tx_ring_create(struct hvn_softc *sc, int ring_cnt) 1926{ 1927 struct hvn_tx_ring *txr; 1928 struct hvn_tx_desc *txd; 1929 bus_dma_segment_t *seg; 1930 size_t msgsize; 1931 int i, j; 1932 paddr_t pa; 1933 1934 /* 1935 * Create TXBUF for chimney sending. 1936 * 1937 * NOTE: It is shared by all channels. 1938 */ 1939 sc->sc_chim = hyperv_dma_alloc(sc->sc_dmat, &sc->sc_chim_dma, 1940 HVN_CHIM_SIZE, PAGE_SIZE, 0, 1); 1941 if (sc->sc_chim == NULL) { 1942 DPRINTF("%s: failed to allocate chimney sending memory", 1943 device_xname(sc->sc_dev)); 1944 goto errout; 1945 } 1946 1947 sc->sc_txr = kmem_zalloc(sizeof(*txr) * ring_cnt, KM_SLEEP); 1948 sc->sc_ntxr_inuse = sc->sc_ntxr = ring_cnt; 1949 1950 msgsize = roundup(HVN_RNDIS_PKT_LEN, 128); 1951 1952 for (j = 0; j < ring_cnt; j++) { 1953 txr = &sc->sc_txr[j]; 1954 txr->txr_softc = sc; 1955 txr->txr_id = j; 1956 1957 mutex_init(&txr->txr_lock, MUTEX_DEFAULT, IPL_NET); 1958 txr->txr_interq = pcq_create(HVN_TX_DESC, KM_SLEEP); 1959 1960 snprintf(txr->txr_name, sizeof(txr->txr_name), 1961 "%s-tx%d", device_xname(sc->sc_dev), j); 1962 evcnt_attach_dynamic(&txr->txr_evpkts, EVCNT_TYPE_MISC, 1963 NULL, txr->txr_name, "packets transmit"); 1964 evcnt_attach_dynamic(&txr->txr_evsends, EVCNT_TYPE_MISC, 1965 NULL, txr->txr_name, "sends"); 1966 evcnt_attach_dynamic(&txr->txr_evnodesc, EVCNT_TYPE_MISC, 1967 NULL, txr->txr_name, "descriptor shortage"); 1968 evcnt_attach_dynamic(&txr->txr_evdmafailed, EVCNT_TYPE_MISC, 1969 NULL, txr->txr_name, "DMA failure"); 1970 evcnt_attach_dynamic(&txr->txr_evdefrag, EVCNT_TYPE_MISC, 1971 NULL, txr->txr_name, "mbuf defraged"); 1972 evcnt_attach_dynamic(&txr->txr_evpcqdrop, EVCNT_TYPE_MISC, 1973 NULL, txr->txr_name, "dropped in pcq"); 1974 evcnt_attach_dynamic(&txr->txr_evtransmitdefer, EVCNT_TYPE_MISC, 1975 NULL, txr->txr_name, "deferred transmit"); 1976 evcnt_attach_dynamic(&txr->txr_evflushfailed, EVCNT_TYPE_MISC, 1977 NULL, txr->txr_name, "aggregation flush failure"); 1978 evcnt_attach_dynamic(&txr->txr_evchimneytried, EVCNT_TYPE_MISC, 1979 NULL, txr->txr_name, "chimney send tried"); 1980 evcnt_attach_dynamic(&txr->txr_evchimney, EVCNT_TYPE_MISC, 1981 NULL, txr->txr_name, "chimney send"); 1982 evcnt_attach_dynamic(&txr->txr_evvlanfixup, EVCNT_TYPE_MISC, 1983 NULL, txr->txr_name, "VLAN fixup"); 1984 evcnt_attach_dynamic(&txr->txr_evvlanhwtagging, EVCNT_TYPE_MISC, 1985 NULL, txr->txr_name, "VLAN H/W tagging"); 1986 evcnt_attach_dynamic(&txr->txr_evvlantap, EVCNT_TYPE_MISC, 1987 NULL, txr->txr_name, "VLAN bpf_mtap fixup"); 1988 1989 txr->txr_si = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE, 1990 hvn_deferred_transmit, txr); 1991 if (txr->txr_si == NULL) { 1992 aprint_error_dev(sc->sc_dev, 1993 "failed to establish softint for tx ring\n"); 1994 goto errout; 1995 } 1996 1997 /* Allocate memory to store RNDIS messages */ 1998 txr->txr_msgs = hyperv_dma_alloc(sc->sc_dmat, &txr->txr_dma, 1999 msgsize * HVN_TX_DESC, PAGE_SIZE, 0, 1); 2000 if (txr->txr_msgs == NULL) { 2001 DPRINTF("%s: failed to allocate memory for RDNIS " 2002 "messages\n", device_xname(sc->sc_dev)); 2003 goto errout; 2004 } 2005 2006 TAILQ_INIT(&txr->txr_list); 2007 for (i = 0; i < HVN_TX_DESC; i++) { 2008 txd = &txr->txr_desc[i]; 2009 txd->txd_chim_index = HVN_NVS_CHIM_IDX_INVALID; 2010 txd->txd_chim_size = 0; 2011 STAILQ_INIT(&txd->txd_agg_list); 2012 if (bus_dmamap_create(sc->sc_dmat, HVN_TX_PKT_SIZE, 2013 HVN_TX_FRAGS, HVN_TX_FRAG_SIZE, PAGE_SIZE, 2014 BUS_DMA_WAITOK, &txd->txd_dmap)) { 2015 DPRINTF("%s: failed to create map for TX " 2016 "descriptors\n", device_xname(sc->sc_dev)); 2017 goto errout; 2018 } 2019 seg = &txr->txr_dma.map->dm_segs[0]; 2020 pa = seg->ds_addr + (msgsize * i); 2021 txd->txd_gpa.gpa_page = atop(pa); 2022 txd->txd_gpa.gpa_ofs = pa & PAGE_MASK; 2023 txd->txd_gpa.gpa_len = msgsize; 2024 txd->txd_req = (void *)(txr->txr_msgs + (msgsize * i)); 2025 txd->txd_id = i + HVN_NVS_CHIM_SIG; 2026 TAILQ_INSERT_TAIL(&txr->txr_list, txd, txd_entry); 2027 } 2028 txr->txr_avail = HVN_TX_DESC; 2029 } 2030 2031 return 0; 2032 2033 errout: 2034 hvn_tx_ring_destroy(sc); 2035 return -1; 2036} 2037 2038static void 2039hvn_tx_ring_destroy(struct hvn_softc *sc) 2040{ 2041 struct hvn_tx_ring *txr; 2042 struct hvn_tx_desc *txd; 2043 int i, j; 2044 2045 if (sc->sc_txr != NULL) { 2046 for (j = 0; j < sc->sc_ntxr; j++) { 2047 txr = &sc->sc_txr[j]; 2048 2049 mutex_enter(&txr->txr_lock); 2050 for (i = 0; i < HVN_TX_DESC; i++) { 2051 txd = &txr->txr_desc[i]; 2052 hvn_txd_gc(txr, txd); 2053 } 2054 mutex_exit(&txr->txr_lock); 2055 for (i = 0; i < HVN_TX_DESC; i++) { 2056 txd = &txr->txr_desc[i]; 2057 if (txd->txd_dmap != NULL) { 2058 bus_dmamap_destroy(sc->sc_dmat, 2059 txd->txd_dmap); 2060 txd->txd_dmap = NULL; 2061 } 2062 } 2063 if (txr->txr_msgs != NULL) { 2064 hyperv_dma_free(sc->sc_dmat, &txr->txr_dma); 2065 txr->txr_msgs = NULL; 2066 } 2067 if (txr->txr_si != NULL) { 2068 softint_disestablish(txr->txr_si); 2069 txr->txr_si = NULL; 2070 } 2071 if (txr->txr_interq != NULL) { 2072 hvn_tx_ring_qflush(sc, txr); 2073 pcq_destroy(txr->txr_interq); 2074 txr->txr_interq = NULL; 2075 } 2076 2077 evcnt_detach(&txr->txr_evpkts); 2078 evcnt_detach(&txr->txr_evsends); 2079 evcnt_detach(&txr->txr_evnodesc); 2080 evcnt_detach(&txr->txr_evdmafailed); 2081 evcnt_detach(&txr->txr_evdefrag); 2082 evcnt_detach(&txr->txr_evpcqdrop); 2083 evcnt_detach(&txr->txr_evtransmitdefer); 2084 evcnt_detach(&txr->txr_evflushfailed); 2085 evcnt_detach(&txr->txr_evchimneytried); 2086 evcnt_detach(&txr->txr_evchimney); 2087 evcnt_detach(&txr->txr_evvlanfixup); 2088 evcnt_detach(&txr->txr_evvlanhwtagging); 2089 evcnt_detach(&txr->txr_evvlantap); 2090 2091 mutex_destroy(&txr->txr_lock); 2092 } 2093 2094 kmem_free(sc->sc_txr, sizeof(*txr) * sc->sc_ntxr); 2095 sc->sc_txr = NULL; 2096 } 2097 2098 if (sc->sc_chim != NULL) { 2099 hyperv_dma_free(sc->sc_dmat, &sc->sc_chim_dma); 2100 sc->sc_chim = NULL; 2101 } 2102} 2103 2104static void 2105hvn_set_chim_size(struct hvn_softc *sc, int chim_size) 2106{ 2107 struct hvn_tx_ring *txr; 2108 int i; 2109 2110 for (i = 0; i < sc->sc_ntxr_inuse; i++) { 2111 txr = &sc->sc_txr[i]; 2112 txr->txr_chim_size = chim_size; 2113 } 2114} 2115 2116#if LONG_BIT == 64 2117#define ffsl(v) ffs64(v) 2118#elif LONG_BIT == 32 2119#define ffsl(v) ffs32(v) 2120#else 2121#error unsupport LONG_BIT 2122#endif /* LONG_BIT */ 2123 2124static uint32_t 2125hvn_chim_alloc(struct hvn_softc *sc) 2126{ 2127 uint32_t chim_idx = HVN_NVS_CHIM_IDX_INVALID; 2128 int i, idx; 2129 2130 mutex_spin_enter(&sc->sc_chim_bmap_lock); 2131 for (i = 0; i < sc->sc_chim_bmap_cnt; i++) { 2132 idx = ffsl(~sc->sc_chim_bmap[i]); 2133 if (idx == 0) 2134 continue; 2135 2136 --idx; /* ffsl is 1-based */ 2137 SET(sc->sc_chim_bmap[i], __BIT(idx)); 2138 2139 chim_idx = i * LONG_BIT + idx; 2140 break; 2141 } 2142 mutex_spin_exit(&sc->sc_chim_bmap_lock); 2143 2144 return chim_idx; 2145} 2146 2147static void 2148hvn_chim_free(struct hvn_softc *sc, uint32_t chim_idx) 2149{ 2150 u_long mask; 2151 uint32_t idx; 2152 2153 idx = chim_idx / LONG_BIT; 2154 mask = __BIT(chim_idx % LONG_BIT); 2155 2156 mutex_spin_enter(&sc->sc_chim_bmap_lock); 2157 CLR(sc->sc_chim_bmap[idx], mask); 2158 mutex_spin_exit(&sc->sc_chim_bmap_lock); 2159} 2160 2161static void 2162hvn_fixup_tx_data(struct hvn_softc *sc) 2163{ 2164 struct hvn_tx_ring *txr; 2165 uint64_t caps_assist; 2166 int csum_assist; 2167 int i; 2168 2169 hvn_set_chim_size(sc, sc->sc_chim_szmax); 2170 if (hvn_tx_chimney_size > 0 && hvn_tx_chimney_size < sc->sc_chim_szmax) 2171 hvn_set_chim_size(sc, hvn_tx_chimney_size); 2172 2173 caps_assist = 0; 2174 csum_assist = 0; 2175 if (sc->sc_caps & HVN_CAPS_IPCS) { 2176 caps_assist |= IFCAP_CSUM_IPv4_Tx; 2177 caps_assist |= IFCAP_CSUM_IPv4_Rx; 2178 csum_assist |= M_CSUM_IPv4; 2179 } 2180 if (sc->sc_caps & HVN_CAPS_TCP4CS) { 2181 caps_assist |= IFCAP_CSUM_TCPv4_Tx; 2182 caps_assist |= IFCAP_CSUM_TCPv4_Rx; 2183 csum_assist |= M_CSUM_TCPv4; 2184 } 2185 if (sc->sc_caps & HVN_CAPS_TCP6CS) { 2186 caps_assist |= IFCAP_CSUM_TCPv6_Tx; 2187 csum_assist |= M_CSUM_TCPv6; 2188 } 2189 if (sc->sc_caps & HVN_CAPS_UDP4CS) { 2190 caps_assist |= IFCAP_CSUM_UDPv4_Tx; 2191 caps_assist |= IFCAP_CSUM_UDPv4_Rx; 2192 csum_assist |= M_CSUM_UDPv4; 2193 } 2194 if (sc->sc_caps & HVN_CAPS_UDP6CS) { 2195 caps_assist |= IFCAP_CSUM_UDPv6_Tx; 2196 csum_assist |= M_CSUM_UDPv6; 2197 } 2198 for (i = 0; i < sc->sc_ntxr; i++) { 2199 txr = &sc->sc_txr[i]; 2200 txr->txr_caps_assist = caps_assist; 2201 txr->txr_csum_assist = csum_assist; 2202 } 2203 2204 if (sc->sc_caps & HVN_CAPS_UDPHASH) { 2205 for (i = 0; i < sc->sc_ntxr; i++) { 2206 txr = &sc->sc_txr[i]; 2207 txr->txr_flags |= HVN_TXR_FLAG_UDP_HASH; 2208 } 2209 } 2210} 2211 2212static int 2213hvn_txd_peek(struct hvn_tx_ring *txr) 2214{ 2215 2216 KASSERT(mutex_owned(&txr->txr_lock)); 2217 2218 return txr->txr_avail; 2219} 2220 2221static struct hvn_tx_desc * 2222hvn_txd_get(struct hvn_tx_ring *txr) 2223{ 2224 struct hvn_tx_desc *txd; 2225 2226 KASSERT(mutex_owned(&txr->txr_lock)); 2227 2228 txd = TAILQ_FIRST(&txr->txr_list); 2229 KASSERT(txd != NULL); 2230 TAILQ_REMOVE(&txr->txr_list, txd, txd_entry); 2231 txr->txr_avail--; 2232 2233 txd->txd_refs = 1; 2234 2235 return txd; 2236} 2237 2238static void 2239hvn_txd_put(struct hvn_tx_ring *txr, struct hvn_tx_desc *txd) 2240{ 2241 struct hvn_softc *sc = txr->txr_softc; 2242 struct hvn_tx_desc *tmp_txd; 2243 2244 KASSERT(mutex_owned(&txr->txr_lock)); 2245 KASSERTMSG(!ISSET(txd->txd_flags, HVN_TXD_FLAG_ONAGG), 2246 "put an onagg txd %#x", txd->txd_flags); 2247 2248 KASSERTMSG(txd->txd_refs > 0, "invalid txd refs %d", txd->txd_refs); 2249 if (atomic_dec_uint_nv(&txd->txd_refs) != 0) 2250 return; 2251 2252 if (!STAILQ_EMPTY(&txd->txd_agg_list)) { 2253 while ((tmp_txd = STAILQ_FIRST(&txd->txd_agg_list)) != NULL) { 2254 KASSERTMSG(STAILQ_EMPTY(&tmp_txd->txd_agg_list), 2255 "resursive aggregation on aggregated txdesc"); 2256 KASSERTMSG( 2257 ISSET(tmp_txd->txd_flags, HVN_TXD_FLAG_ONAGG), 2258 "not aggregated txdesc"); 2259 KASSERTMSG( 2260 tmp_txd->txd_chim_index == HVN_NVS_CHIM_IDX_INVALID, 2261 "aggregated txdesc consumes chimney sending " 2262 "buffer: idx %u", tmp_txd->txd_chim_index); 2263 KASSERTMSG(tmp_txd->txd_chim_size == 0, 2264 "aggregated txdesc has non-zero chimney sending " 2265 "size: sz %u", tmp_txd->txd_chim_size); 2266 2267 STAILQ_REMOVE_HEAD(&txd->txd_agg_list, txd_agg_entry); 2268 CLR(tmp_txd->txd_flags, HVN_TXD_FLAG_ONAGG); 2269 hvn_txd_put(txr, tmp_txd); 2270 } 2271 } 2272 2273 if (txd->txd_chim_index != HVN_NVS_CHIM_IDX_INVALID) { 2274 KASSERTMSG(!ISSET(txd->txd_flags, HVN_TXD_FLAG_DMAMAP), 2275 "chim txd uses dmamap"); 2276 hvn_chim_free(sc, txd->txd_chim_index); 2277 txd->txd_chim_index = HVN_NVS_CHIM_IDX_INVALID; 2278 txd->txd_chim_size = 0; 2279 } else if (ISSET(txd->txd_flags, HVN_TXD_FLAG_DMAMAP)) { 2280 bus_dmamap_sync(sc->sc_dmat, txd->txd_dmap, 2281 0, txd->txd_dmap->dm_mapsize, 2282 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 2283 bus_dmamap_unload(sc->sc_dmat, txd->txd_dmap); 2284 CLR(txd->txd_flags, HVN_TXD_FLAG_DMAMAP); 2285 } 2286 2287 if (txd->txd_buf != NULL) { 2288 m_freem(txd->txd_buf); 2289 txd->txd_buf = NULL; 2290 } 2291 2292 TAILQ_INSERT_TAIL(&txr->txr_list, txd, txd_entry); 2293 txr->txr_avail++; 2294 txr->txr_oactive = 0; 2295} 2296 2297static void 2298hvn_txd_gc(struct hvn_tx_ring *txr, struct hvn_tx_desc *txd) 2299{ 2300 2301 KASSERTMSG(txd->txd_refs == 0 || txd->txd_refs == 1, 2302 "invalid txd refs %d", txd->txd_refs); 2303 2304 /* Aggregated txds will be freed by their aggregating txd. */ 2305 if (txd->txd_refs > 0 && !ISSET(txd->txd_flags, HVN_TXD_FLAG_ONAGG)) 2306 hvn_txd_put(txr, txd); 2307} 2308 2309static void 2310hvn_txd_hold(struct hvn_tx_desc *txd) 2311{ 2312 2313 /* 0->1 transition will never work */ 2314 KASSERTMSG(txd->txd_refs > 0, "invalid txd refs %d", txd->txd_refs); 2315 2316 atomic_inc_uint(&txd->txd_refs); 2317} 2318 2319static void 2320hvn_txd_agg(struct hvn_tx_desc *agg_txd, struct hvn_tx_desc *txd) 2321{ 2322 2323 KASSERTMSG(!ISSET(agg_txd->txd_flags, HVN_TXD_FLAG_ONAGG), 2324 "recursive aggregation on aggregating txdesc"); 2325 KASSERTMSG(!ISSET(txd->txd_flags, HVN_TXD_FLAG_ONAGG), 2326 "already aggregated"); 2327 KASSERTMSG(STAILQ_EMPTY(&txd->txd_agg_list), 2328 "recursive aggregation on to-be-aggregated txdesc"); 2329 2330 SET(txd->txd_flags, HVN_TXD_FLAG_ONAGG); 2331 STAILQ_INSERT_TAIL(&agg_txd->txd_agg_list, txd, txd_agg_entry); 2332} 2333 2334static int 2335hvn_tx_ring_pending(struct hvn_tx_ring *txr) 2336{ 2337 int pending = 0; 2338 2339 mutex_enter(&txr->txr_lock); 2340 if (hvn_txd_peek(txr) != HVN_TX_DESC) 2341 pending = 1; 2342 mutex_exit(&txr->txr_lock); 2343 2344 return pending; 2345} 2346 2347static void 2348hvn_tx_ring_qflush(struct hvn_softc *sc, struct hvn_tx_ring *txr) 2349{ 2350 struct mbuf *m; 2351 2352 while ((m = pcq_get(txr->txr_interq)) != NULL) 2353 m_freem(m); 2354} 2355 2356static int 2357hvn_get_lladdr(struct hvn_softc *sc, uint8_t *enaddr) 2358{ 2359 size_t addrlen = ETHER_ADDR_LEN; 2360 int rv; 2361 2362 rv = hvn_rndis_query(sc, OID_802_3_PERMANENT_ADDRESS, enaddr, &addrlen); 2363 if (rv == 0 && addrlen != ETHER_ADDR_LEN) 2364 rv = -1; 2365 return rv; 2366} 2367 2368static void 2369hvn_update_link_status(struct hvn_softc *sc) 2370{ 2371 struct ifnet *ifp = SC2IFP(sc); 2372 uint32_t state, old_link_state; 2373 size_t len = sizeof(state); 2374 int rv; 2375 2376 rv = hvn_rndis_query(sc, OID_GEN_MEDIA_CONNECT_STATUS, &state, &len); 2377 if (rv != 0 || len != sizeof(state)) 2378 return; 2379 2380 old_link_state = sc->sc_link_state; 2381 sc->sc_link_state = (state == NDIS_MEDIA_STATE_CONNECTED) ? 2382 LINK_STATE_UP : LINK_STATE_DOWN; 2383 if (old_link_state != sc->sc_link_state) { 2384 if_link_state_change(ifp, sc->sc_link_state); 2385 } 2386} 2387 2388static int 2389hvn_get_mtu(struct hvn_softc *sc, uint32_t *mtu) 2390{ 2391 size_t mtusz = sizeof(*mtu); 2392 int rv; 2393 2394 rv = hvn_rndis_query(sc, OID_GEN_MAXIMUM_FRAME_SIZE, mtu, &mtusz); 2395 if (rv == 0 && mtusz != sizeof(*mtu)) 2396 rv = -1; 2397 return rv; 2398} 2399 2400static int 2401hvn_channel_attach(struct hvn_softc *sc, struct vmbus_channel *chan) 2402{ 2403 struct hvn_rx_ring *rxr; 2404 struct hvn_tx_ring *txr; 2405 int idx; 2406 2407 idx = chan->ch_subidx; 2408 if (idx < 0 || idx >= sc->sc_nrxr_inuse) { 2409 DPRINTF("%s: invalid sub-channel %u\n", 2410 device_xname(sc->sc_dev), idx); 2411 return -1; 2412 } 2413 2414 rxr = &sc->sc_rxr[idx]; 2415 rxr->rxr_chan = chan; 2416 2417 if (idx < sc->sc_ntxr_inuse) { 2418 txr = &sc->sc_txr[idx]; 2419 txr->txr_chan = chan; 2420 } 2421 2422 /* Bind this channel to a proper CPU. */ 2423 vmbus_channel_cpu_set(chan, HVN_RING_IDX2CPU(sc, idx)); 2424 2425 chan->ch_flags &= ~CHF_BATCHED; 2426 2427 /* Associate our interrupt handler with the channel */ 2428 if (vmbus_channel_open(chan, 2429 HVN_RING_BUFSIZE - sizeof(struct vmbus_bufring), NULL, 0, 2430 hvn_nvs_intr, rxr)) { 2431 DPRINTF("%s: failed to open channel\n", 2432 device_xname(sc->sc_dev)); 2433 return -1; 2434 } 2435 2436 return 0; 2437} 2438 2439static void 2440hvn_channel_detach(struct hvn_softc *sc, struct vmbus_channel *chan) 2441{ 2442 2443 vmbus_channel_close_direct(chan); 2444} 2445 2446static void 2447hvn_channel_detach_all(struct hvn_softc *sc) 2448{ 2449 struct vmbus_channel **subchans; 2450 int i, subchan_cnt = sc->sc_nrxr_inuse - 1; 2451 2452 if (subchan_cnt > 0) { 2453 /* Detach the sub-channels. */ 2454 subchans = vmbus_subchannel_get(sc->sc_prichan, subchan_cnt); 2455 for (i = 0; i < subchan_cnt; i++) 2456 hvn_channel_detach(sc, subchans[i]); 2457 vmbus_subchannel_rel(subchans, subchan_cnt); 2458 } 2459 2460 /* 2461 * Detach the primary channel, _after_ all sub-channels 2462 * are detached. 2463 */ 2464 hvn_channel_detach(sc, sc->sc_prichan); 2465 2466 /* Wait for sub-channels to be destroyed, if any. */ 2467 vmbus_subchannel_drain(sc->sc_prichan); 2468} 2469 2470static int 2471hvn_subchannel_attach(struct hvn_softc *sc) 2472{ 2473 struct vmbus_channel **subchans; 2474 int subchan_cnt = sc->sc_nrxr_inuse - 1; 2475 int i, error = 0; 2476 2477 KASSERTMSG(subchan_cnt > 0, "no sub-channels"); 2478 2479 /* Attach the sub-channels. */ 2480 subchans = vmbus_subchannel_get(sc->sc_prichan, subchan_cnt); 2481 for (i = 0; i < subchan_cnt; ++i) { 2482 int error1; 2483 2484 error1 = hvn_channel_attach(sc, subchans[i]); 2485 if (error1) { 2486 error = error1; 2487 /* Move on; all channels will be detached later. */ 2488 } 2489 } 2490 vmbus_subchannel_rel(subchans, subchan_cnt); 2491 2492 if (error) { 2493 aprint_error_dev(sc->sc_dev, 2494 "sub-channels attach failed: %d\n", error); 2495 return error; 2496 } 2497 2498 aprint_debug_dev(sc->sc_dev, "%d sub-channels attached\n", 2499 subchan_cnt); 2500 return 0; 2501} 2502 2503static int 2504hvn_synth_alloc_subchannels(struct hvn_softc *sc, int *nsubch) 2505{ 2506 struct vmbus_channel **subchans; 2507 int error, nchan, rxr_cnt; 2508 2509 nchan = *nsubch + 1; 2510 if (nchan < 2) { 2511 /* Multiple RX/TX rings are not requested. */ 2512 *nsubch = 0; 2513 return 0; 2514 } 2515 2516 /* 2517 * Query RSS capabilities, e.g. # of RX rings, and # of indirect 2518 * table entries. 2519 */ 2520 if (hvn_get_rsscaps(sc, &rxr_cnt)) { 2521 /* No RSS. */ 2522 *nsubch = 0; 2523 return 0; 2524 } 2525 2526 aprint_debug_dev(sc->sc_dev, "RX rings offered %u, requested %d\n", 2527 rxr_cnt, nchan); 2528 2529 if (nchan > rxr_cnt) 2530 nchan = rxr_cnt; 2531 if (nchan == 1) { 2532 aprint_debug_dev(sc->sc_dev, 2533 "only 1 channel is supported, no vRSS\n"); 2534 *nsubch = 0; 2535 return 0; 2536 } 2537 2538 *nsubch = nchan - 1; 2539 error = hvn_nvs_alloc_subchannels(sc, nsubch); 2540 if (error || *nsubch == 0) { 2541 /* Failed to allocate sub-channels. */ 2542 *nsubch = 0; 2543 return 0; 2544 } 2545 2546 /* 2547 * Wait for all sub-channels to become ready before moving on. 2548 */ 2549 subchans = vmbus_subchannel_get(sc->sc_prichan, *nsubch); 2550 vmbus_subchannel_rel(subchans, *nsubch); 2551 return 0; 2552} 2553 2554static int 2555hvn_synth_attachable(const struct hvn_softc *sc) 2556{ 2557#if 0 2558 const struct hvn_rx_ring *rxr; 2559 int i; 2560 2561 for (i = 0; i < sc->sc_nrxr; i++) { 2562 rxr = &sc->sc_rxr[i]; 2563 if (rxr->rxr_flags) 2564 return 0; 2565 } 2566#endif 2567 return 1; 2568} 2569 2570/* 2571 * Make sure that the RX filter is zero after the successful 2572 * RNDIS initialization. 2573 * 2574 * NOTE: 2575 * Under certain conditions on certain versions of Hyper-V, 2576 * the RNDIS rxfilter is _not_ zero on the hypervisor side 2577 * after the successful RNDIS initialization, which breaks 2578 * the assumption of any following code (well, it breaks the 2579 * RNDIS API contract actually). Clear the RNDIS rxfilter 2580 * explicitly, drain packets sneaking through, and drain the 2581 * interrupt taskqueues scheduled due to the stealth packets. 2582 */ 2583static void 2584hvn_init_fixat(struct hvn_softc *sc, int nchan) 2585{ 2586 2587 hvn_disable_rx(sc); 2588 hvn_drain_rxtx(sc, nchan); 2589} 2590 2591static void 2592hvn_set_txagg(struct hvn_softc *sc) 2593{ 2594 struct hvn_tx_ring *txr; 2595 uint32_t size, pkts; 2596 int i; 2597 2598 /* 2599 * Setup aggregation size. 2600 */ 2601 if (sc->sc_agg_size < 0) 2602 size = UINT32_MAX; 2603 else 2604 size = sc->sc_agg_size; 2605 2606 if (size > sc->sc_rndis_agg_size) 2607 size = sc->sc_rndis_agg_size; 2608 2609 /* NOTE: We only aggregate packets using chimney sending buffers. */ 2610 if (size > (uint32_t)sc->sc_chim_szmax) 2611 size = sc->sc_chim_szmax; 2612 2613 if (size <= 2 * HVN_PKTSIZE_MIN(sc->sc_rndis_agg_align)) { 2614 /* Disable */ 2615 size = 0; 2616 pkts = 0; 2617 goto done; 2618 } 2619 2620 /* NOTE: Type of the per TX ring setting is 'int'. */ 2621 if (size > INT_MAX) 2622 size = INT_MAX; 2623 2624 /* 2625 * Setup aggregation packet count. 2626 */ 2627 if (sc->sc_agg_pkts < 0) 2628 pkts = UINT32_MAX; 2629 else 2630 pkts = sc->sc_agg_pkts; 2631 2632 if (pkts > sc->sc_rndis_agg_pkts) 2633 pkts = sc->sc_rndis_agg_pkts; 2634 2635 if (pkts <= 1) { 2636 /* Disable */ 2637 size = 0; 2638 pkts = 0; 2639 goto done; 2640 } 2641 2642 /* NOTE: Type of the per TX ring setting is 'short'. */ 2643 if (pkts > SHRT_MAX) 2644 pkts = SHRT_MAX; 2645 2646done: 2647 /* NOTE: Type of the per TX ring setting is 'short'. */ 2648 if (sc->sc_rndis_agg_align > SHRT_MAX) { 2649 /* Disable */ 2650 size = 0; 2651 pkts = 0; 2652 } 2653 2654 aprint_verbose_dev(sc->sc_dev, 2655 "TX aggregate size %u, pkts %u, align %u\n", 2656 size, pkts, sc->sc_rndis_agg_align); 2657 2658 for (i = 0; i < sc->sc_ntxr_inuse; ++i) { 2659 txr = &sc->sc_txr[i]; 2660 2661 mutex_enter(&txr->txr_lock); 2662 txr->txr_agg_szmax = size; 2663 txr->txr_agg_pktmax = pkts; 2664 txr->txr_agg_align = sc->sc_rndis_agg_align; 2665 mutex_exit(&txr->txr_lock); 2666 } 2667} 2668 2669static int 2670hvn_synth_attach(struct hvn_softc *sc, int mtu) 2671{ 2672 uint8_t rss_key[RSS_KEYSIZE]; 2673 uint32_t old_caps; 2674 int nchan = 1, nsubch; 2675 int i, error; 2676 2677 if (!hvn_synth_attachable(sc)) 2678 return ENXIO; 2679 2680 /* Save capabilities for later verification. */ 2681 old_caps = sc->sc_caps; 2682 sc->sc_caps = 0; 2683 2684 /* Clear RSS stuffs. */ 2685 sc->sc_rss_ind_size = 0; 2686 sc->sc_rss_hash = 0; 2687 sc->sc_rss_hcap = 0; 2688 2689 /* 2690 * Attach the primary channel _before_ attaching NVS and RNDIS. 2691 */ 2692 error = hvn_channel_attach(sc, sc->sc_prichan); 2693 if (error) { 2694 aprint_error_dev(sc->sc_dev, 2695 "failed to attach primary channel\n"); 2696 goto failed; 2697 } 2698 2699 /* 2700 * Attach NVS. 2701 */ 2702 error = hvn_nvs_attach(sc, mtu); 2703 if (error) { 2704 aprint_error_dev(sc->sc_dev, "failed to init NVSP\n"); 2705 goto detach_channel; 2706 } 2707 2708 /* 2709 * Attach RNDIS _after_ NVS is attached. 2710 */ 2711 error = hvn_rndis_attach(sc, mtu); 2712 if (error) { 2713 aprint_error_dev(sc->sc_dev, "failed to init RNDIS\n"); 2714 goto detach_nvs; 2715 } 2716 2717 error = hvn_set_capabilities(sc, mtu); 2718 if (error) { 2719 aprint_error_dev(sc->sc_dev, "failed to setup offloading\n"); 2720 goto detach_rndis; 2721 } 2722 2723 if ((sc->sc_flags & HVN_SCF_ATTACHED) && old_caps != sc->sc_caps) { 2724 device_printf(sc->sc_dev, "caps mismatch " 2725 "old 0x%08x, new 0x%08x\n", old_caps, sc->sc_caps); 2726 error = ENXIO; 2727 goto detach_rndis; 2728 } 2729 2730 /* 2731 * Allocate sub-channels for multi-TX/RX rings. 2732 * 2733 * NOTE: 2734 * The # of RX rings that can be used is equivalent to the # of 2735 * channels to be requested. 2736 */ 2737 nsubch = sc->sc_nrxr - 1; 2738 error = hvn_synth_alloc_subchannels(sc, &nsubch); 2739 if (error) { 2740 aprint_error_dev(sc->sc_dev, 2741 "failed to allocate sub channels\n"); 2742 goto detach_synth; 2743 } 2744 2745 /* 2746 * Set the # of TX/RX rings that could be used according to 2747 * the # of channels that NVS offered. 2748 */ 2749 nchan = nsubch + 1; 2750 hvn_set_ring_inuse(sc, nchan); 2751 2752 if (nchan > 1) { 2753 /* 2754 * Attach the sub-channels. 2755 * 2756 * NOTE: hvn_set_ring_inuse() _must_ have been called. 2757 */ 2758 error = hvn_subchannel_attach(sc); 2759 if (error) { 2760 aprint_error_dev(sc->sc_dev, 2761 "failed to attach sub channels\n"); 2762 goto detach_synth; 2763 } 2764 2765 /* 2766 * Configure RSS key and indirect table _after_ all sub-channels 2767 * are attached. 2768 */ 2769 if (!(sc->sc_flags & HVN_SCF_HAS_RSSKEY)) { 2770 /* Set the default RSS key. */ 2771 CTASSERT(sizeof(sc->sc_rss.rss_key) == sizeof(rss_key)); 2772 rss_getkey(rss_key); 2773 memcpy(&sc->sc_rss.rss_key, rss_key, 2774 sizeof(sc->sc_rss.rss_key)); 2775 sc->sc_flags |= HVN_SCF_HAS_RSSKEY; 2776 } 2777 2778 if (!(sc->sc_flags & HVN_SCF_HAS_RSSIND)) { 2779 /* Setup RSS indirect table in round-robin fashion. */ 2780 for (i = 0; i < NDIS_HASH_INDCNT; i++) { 2781 sc->sc_rss.rss_ind[i] = i % nchan; 2782 } 2783 sc->sc_flags |= HVN_SCF_HAS_RSSIND; 2784 } else { 2785 /* 2786 * # of usable channels may be changed, so we have to 2787 * make sure that all entries in RSS indirect table 2788 * are valid. 2789 * 2790 * NOTE: hvn_set_ring_inuse() _must_ have been called. 2791 */ 2792 hvn_fixup_rss_ind(sc); 2793 } 2794 2795 sc->sc_rss_hash = sc->sc_rss_hcap; 2796 error = hvn_set_rss(sc, NDIS_RSS_FLAG_NONE); 2797 if (error) { 2798 aprint_error_dev(sc->sc_dev, "failed to setup RSS\n"); 2799 goto detach_synth; 2800 } 2801 } 2802 2803 /* 2804 * Fixup transmission aggregation setup. 2805 */ 2806 hvn_set_txagg(sc); 2807 hvn_init_fixat(sc, nchan); 2808 return 0; 2809 2810detach_synth: 2811 hvn_init_fixat(sc, nchan); 2812 hvn_synth_detach(sc); 2813 return error; 2814 2815detach_rndis: 2816 hvn_init_fixat(sc, nchan); 2817 hvn_rndis_detach(sc); 2818detach_nvs: 2819 hvn_nvs_detach(sc); 2820detach_channel: 2821 hvn_channel_detach(sc, sc->sc_prichan); 2822failed: 2823 /* Restore old capabilities. */ 2824 sc->sc_caps = old_caps; 2825 return error; 2826} 2827 2828static void 2829hvn_synth_detach(struct hvn_softc *sc) 2830{ 2831 2832 /* Detach the RNDIS first. */ 2833 hvn_rndis_detach(sc); 2834 2835 /* Detach NVS. */ 2836 hvn_nvs_detach(sc); 2837 2838 /* Detach all of the channels. */ 2839 hvn_channel_detach_all(sc); 2840 2841 if (sc->sc_prichan->ch_sc->sc_proto >= VMBUS_VERSION_WIN10 && 2842 sc->sc_rx_hndl) { 2843 /* 2844 * Host is post-Win2016, disconnect RXBUF from primary channel 2845 * here. 2846 */ 2847 vmbus_handle_free(sc->sc_prichan, sc->sc_rx_hndl); 2848 sc->sc_rx_hndl = 0; 2849 } 2850 2851 if (sc->sc_prichan->ch_sc->sc_proto >= VMBUS_VERSION_WIN10 && 2852 sc->sc_chim_hndl) { 2853 /* 2854 * Host is post-Win2016, disconnect chimney sending buffer 2855 * from primary channel here. 2856 */ 2857 vmbus_handle_free(sc->sc_prichan, sc->sc_chim_hndl); 2858 sc->sc_chim_hndl = 0; 2859 } 2860} 2861 2862static void 2863hvn_set_ring_inuse(struct hvn_softc *sc, int ring_cnt) 2864{ 2865 2866 if (sc->sc_ntxr > ring_cnt) 2867 sc->sc_ntxr_inuse = ring_cnt; 2868 else 2869 sc->sc_ntxr_inuse = sc->sc_ntxr; 2870 sc->sc_nrxr_inuse = ring_cnt; 2871} 2872 2873static void 2874hvn_channel_drain(struct hvn_softc *sc, struct vmbus_channel *chan) 2875{ 2876 struct hvn_rx_ring *rxr; 2877 int i, s; 2878 2879 for (rxr = NULL, i = 0; i < sc->sc_nrxr_inuse; i++) { 2880 rxr = &sc->sc_rxr[i]; 2881 if (rxr->rxr_chan == chan) 2882 break; 2883 } 2884 KASSERT(i < sc->sc_nrxr_inuse); 2885 2886 /* 2887 * NOTE: 2888 * The TX bufring will not be drained by the hypervisor, 2889 * if the primary channel is revoked. 2890 */ 2891 while (!vmbus_channel_rx_empty(chan) || 2892 (!vmbus_channel_is_revoked(sc->sc_prichan) && 2893 !vmbus_channel_tx_empty(chan))) { 2894 DELAY(20); 2895 s = splnet(); 2896 hvn_nvs_intr1(rxr, sc->sc_tx_process_limit, 2897 sc->sc_rx_process_limit); 2898 splx(s); 2899 } 2900 2901 mutex_enter(&rxr->rxr_onwork_lock); 2902 while (rxr->rxr_onlist || rxr->rxr_onproc) 2903 cv_wait(&rxr->rxr_onwork_cv, &rxr->rxr_onwork_lock); 2904 mutex_exit(&rxr->rxr_onwork_lock); 2905} 2906 2907static void 2908hvn_disable_rx(struct hvn_softc *sc) 2909{ 2910 2911 /* 2912 * Disable RX by clearing RX filter forcefully. 2913 */ 2914 (void)hvn_rndis_close(sc); /* ignore error */ 2915 2916 /* 2917 * Give RNDIS enough time to flush all pending data packets. 2918 */ 2919 DELAY(200); 2920} 2921 2922static void 2923hvn_drain_rxtx(struct hvn_softc *sc, int nchan) 2924{ 2925 struct vmbus_channel **subchans = NULL; 2926 int i, nsubch; 2927 2928 /* 2929 * Drain RX/TX bufrings and interrupts. 2930 */ 2931 nsubch = nchan - 1; 2932 if (nsubch > 0) 2933 subchans = vmbus_subchannel_get(sc->sc_prichan, nsubch); 2934 2935 if (subchans != NULL) { 2936 for (i = 0; i < nsubch; ++i) 2937 hvn_channel_drain(sc, subchans[i]); 2938 } 2939 hvn_channel_drain(sc, sc->sc_prichan); 2940 2941 if (subchans != NULL) 2942 vmbus_subchannel_rel(subchans, nsubch); 2943} 2944 2945static void 2946hvn_suspend_data(struct hvn_softc *sc) 2947{ 2948 struct hvn_tx_ring *txr; 2949 int i, s; 2950 2951 /* 2952 * Suspend TX. 2953 */ 2954 for (i = 0; i < sc->sc_ntxr_inuse; i++) { 2955 txr = &sc->sc_txr[i]; 2956 2957 mutex_enter(&txr->txr_lock); 2958 txr->txr_suspended = 1; 2959 mutex_exit(&txr->txr_lock); 2960 /* No one is able send more packets now. */ 2961 2962 /* 2963 * Wait for all pending sends to finish. 2964 * 2965 * NOTE: 2966 * We will _not_ receive all pending send-done, if the 2967 * primary channel is revoked. 2968 */ 2969 while (hvn_tx_ring_pending(txr) && 2970 !vmbus_channel_is_revoked(sc->sc_prichan)) { 2971 DELAY(20); 2972 s = splnet(); 2973 hvn_nvs_intr1(txr->txr_rxr, sc->sc_tx_process_limit, 2974 sc->sc_rx_process_limit); 2975 splx(s); 2976 } 2977 } 2978 2979 /* 2980 * Disable RX. 2981 */ 2982 hvn_disable_rx(sc); 2983 2984 /* 2985 * Drain RX/TX. 2986 */ 2987 hvn_drain_rxtx(sc, sc->sc_nrxr_inuse); 2988} 2989 2990static void 2991hvn_suspend_mgmt(struct hvn_softc *sc) 2992{ 2993 2994 sc->sc_link_suspend = true; 2995 callout_halt(&sc->sc_link_tmout, NULL); 2996 2997 /* Drain link state task */ 2998 mutex_enter(&sc->sc_link_lock); 2999 for (;;) { 3000 if (!sc->sc_link_onproc) 3001 break; 3002 mutex_exit(&sc->sc_link_lock); 3003 DELAY(20); 3004 mutex_enter(&sc->sc_link_lock); 3005 } 3006 mutex_exit(&sc->sc_link_lock); 3007} 3008 3009static void 3010hvn_suspend(struct hvn_softc *sc) 3011{ 3012 struct ifnet *ifp = SC2IFP(sc); 3013 3014 if (ifp->if_flags & IFF_RUNNING) 3015 hvn_suspend_data(sc); 3016 hvn_suspend_mgmt(sc); 3017} 3018 3019static void 3020hvn_resume_tx(struct hvn_softc *sc, int ring_cnt) 3021{ 3022 struct hvn_tx_ring *txr; 3023 int i; 3024 3025 for (i = 0; i < ring_cnt; i++) { 3026 txr = &sc->sc_txr[i]; 3027 mutex_enter(&txr->txr_lock); 3028 txr->txr_suspended = 0; 3029 mutex_exit(&txr->txr_lock); 3030 } 3031} 3032 3033static void 3034hvn_resume_data(struct hvn_softc *sc) 3035{ 3036 struct ifnet *ifp = SC2IFP(sc); 3037 struct hvn_tx_ring *txr; 3038 int i; 3039 3040 /* 3041 * Re-enable RX. 3042 */ 3043 hvn_rndis_open(sc); 3044 3045 /* 3046 * Make sure to clear suspend status on "all" TX rings, 3047 * since sc_ntxr_inuse can be changed after hvn_suspend_data(). 3048 */ 3049 hvn_resume_tx(sc, sc->sc_ntxr); 3050 3051 /* 3052 * Flush unused mbuf, since sc_ntxr_inuse may be reduced. 3053 */ 3054 for (i = sc->sc_ntxr_inuse; i < sc->sc_ntxr; i++) 3055 hvn_tx_ring_qflush(sc, &sc->sc_txr[i]); 3056 3057 /* 3058 * Kick start TX. 3059 */ 3060 for (i = 0; i < sc->sc_ntxr_inuse; i++) { 3061 txr = &sc->sc_txr[i]; 3062 mutex_enter(&txr->txr_lock); 3063 txr->txr_oactive = 0; 3064 3065 /* ALTQ */ 3066 if (txr->txr_id == 0) 3067 if_schedule_deferred_start(ifp); 3068 softint_schedule(txr->txr_si); 3069 mutex_exit(&txr->txr_lock); 3070 } 3071} 3072 3073static void 3074hvn_resume_mgmt(struct hvn_softc *sc) 3075{ 3076 3077 sc->sc_link_suspend = false; 3078 hvn_link_event(sc, HVN_LINK_EV_RESUME_NETWORK); 3079} 3080 3081static void 3082hvn_resume(struct hvn_softc *sc) 3083{ 3084 struct ifnet *ifp = SC2IFP(sc); 3085 3086 if (ifp->if_flags & IFF_RUNNING) 3087 hvn_resume_data(sc); 3088 hvn_resume_mgmt(sc); 3089} 3090 3091static int 3092hvn_nvs_init(struct hvn_softc *sc) 3093{ 3094 3095 mutex_init(&sc->sc_nvsrsp_lock, MUTEX_DEFAULT, IPL_NET); 3096 cv_init(&sc->sc_nvsrsp_cv, "nvsrspcv"); 3097 3098 return 0; 3099} 3100 3101static void 3102hvn_nvs_destroy(struct hvn_softc *sc) 3103{ 3104 3105 mutex_destroy(&sc->sc_nvsrsp_lock); 3106 cv_destroy(&sc->sc_nvsrsp_cv); 3107} 3108 3109static int 3110hvn_nvs_doinit(struct hvn_softc *sc, uint32_t proto) 3111{ 3112 struct hvn_nvs_init cmd; 3113 struct hvn_nvs_init_resp *rsp; 3114 uint64_t tid; 3115 int error; 3116 3117 memset(&cmd, 0, sizeof(cmd)); 3118 cmd.nvs_type = HVN_NVS_TYPE_INIT; 3119 cmd.nvs_ver_min = cmd.nvs_ver_max = proto; 3120 3121 tid = atomic_inc_uint_nv(&sc->sc_nvstid); 3122 mutex_enter(&sc->sc_nvsrsp_lock); 3123 error = hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid, 0); 3124 if (error == 0) { 3125 rsp = (struct hvn_nvs_init_resp *)&sc->sc_nvsrsp; 3126 if (rsp->nvs_status != HVN_NVS_STATUS_OK) 3127 error = EINVAL; 3128 } 3129 mutex_exit(&sc->sc_nvsrsp_lock); 3130 3131 return error; 3132} 3133 3134static int 3135hvn_nvs_conf_ndis(struct hvn_softc *sc, int mtu) 3136{ 3137 struct hvn_nvs_ndis_conf cmd; 3138 uint64_t tid; 3139 int error; 3140 3141 memset(&cmd, 0, sizeof(cmd)); 3142 cmd.nvs_type = HVN_NVS_TYPE_NDIS_CONF; 3143 cmd.nvs_mtu = mtu + ETHER_HDR_LEN; 3144 cmd.nvs_caps = HVN_NVS_NDIS_CONF_VLAN; 3145 3146 tid = atomic_inc_uint_nv(&sc->sc_nvstid); 3147 mutex_enter(&sc->sc_nvsrsp_lock); 3148 /* NOTE: No response. */ 3149 error = hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid, 0); 3150 mutex_exit(&sc->sc_nvsrsp_lock); 3151 3152 if (error == 0) 3153 sc->sc_caps |= HVN_CAPS_MTU | HVN_CAPS_VLAN; 3154 return error; 3155} 3156 3157static int 3158hvn_nvs_init_ndis(struct hvn_softc *sc) 3159{ 3160 struct hvn_nvs_ndis_init cmd; 3161 uint64_t tid; 3162 int error; 3163 3164 memset(&cmd, 0, sizeof(cmd)); 3165 cmd.nvs_type = HVN_NVS_TYPE_NDIS_INIT; 3166 cmd.nvs_ndis_major = (sc->sc_ndisver & 0xffff0000) >> 16; 3167 cmd.nvs_ndis_minor = sc->sc_ndisver & 0x0000ffff; 3168 3169 tid = atomic_inc_uint_nv(&sc->sc_nvstid); 3170 mutex_enter(&sc->sc_nvsrsp_lock); 3171 /* NOTE: No response. */ 3172 error = hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid, 0); 3173 mutex_exit(&sc->sc_nvsrsp_lock); 3174 3175 return error; 3176} 3177 3178static int 3179hvn_nvs_attach(struct hvn_softc *sc, int mtu) 3180{ 3181 static const uint32_t protos[] = { 3182 HVN_NVS_PROTO_VERSION_5, 3183 HVN_NVS_PROTO_VERSION_4, 3184 HVN_NVS_PROTO_VERSION_2, 3185 HVN_NVS_PROTO_VERSION_1 3186 }; 3187 int i; 3188 3189 if (hyperv_ver_major >= 10) 3190 sc->sc_caps |= HVN_CAPS_UDPHASH; 3191 3192 /* 3193 * Initialize NVS. 3194 */ 3195 if (sc->sc_flags & HVN_SCF_ATTACHED) { 3196 /* 3197 * NVS version and NDIS version MUST NOT be changed. 3198 */ 3199 DPRINTF("%s: reinit NVS version %#x, NDIS version %u.%u\n", 3200 device_xname(sc->sc_dev), sc->sc_proto, 3201 (sc->sc_ndisver >> 16), sc->sc_ndisver & 0xffff); 3202 3203 if (hvn_nvs_doinit(sc, sc->sc_proto)) { 3204 DPRINTF("%s: failed to reinit NVSP version %#x\n", 3205 device_xname(sc->sc_dev), sc->sc_proto); 3206 return -1; 3207 } 3208 } else { 3209 /* 3210 * Find the supported NVS version and set NDIS version 3211 * accordingly. 3212 */ 3213 for (i = 0; i < __arraycount(protos); i++) { 3214 if (hvn_nvs_doinit(sc, protos[i]) == 0) 3215 break; 3216 } 3217 if (i == __arraycount(protos)) { 3218 DPRINTF("%s: failed to negotiate NVSP version\n", 3219 device_xname(sc->sc_dev)); 3220 return -1; 3221 } 3222 3223 sc->sc_proto = protos[i]; 3224 if (sc->sc_proto <= HVN_NVS_PROTO_VERSION_4) 3225 sc->sc_ndisver = NDIS_VERSION_6_1; 3226 else 3227 sc->sc_ndisver = NDIS_VERSION_6_30; 3228 3229 DPRINTF("%s: NVS version %#x, NDIS version %u.%u\n", 3230 device_xname(sc->sc_dev), sc->sc_proto, 3231 (sc->sc_ndisver >> 16), sc->sc_ndisver & 0xffff); 3232 } 3233 3234 if (sc->sc_proto >= HVN_NVS_PROTO_VERSION_5) 3235 sc->sc_caps |= HVN_CAPS_HASHVAL; 3236 3237 if (sc->sc_proto >= HVN_NVS_PROTO_VERSION_2) { 3238 /* 3239 * Configure NDIS before initializing it. 3240 */ 3241 if (hvn_nvs_conf_ndis(sc, mtu)) 3242 return -1; 3243 } 3244 3245 /* 3246 * Initialize NDIS. 3247 */ 3248 if (hvn_nvs_init_ndis(sc)) 3249 return -1; 3250 3251 /* 3252 * Connect RXBUF. 3253 */ 3254 if (hvn_nvs_connect_rxbuf(sc)) 3255 return -1; 3256 3257 /* 3258 * Connect chimney sending buffer. 3259 */ 3260 if (hvn_nvs_connect_chim(sc)) 3261 return -1; 3262 3263 return 0; 3264} 3265 3266static int 3267hvn_nvs_connect_rxbuf(struct hvn_softc *sc) 3268{ 3269 struct hvn_nvs_rxbuf_conn cmd; 3270 struct hvn_nvs_rxbuf_conn_resp *rsp; 3271 uint64_t tid; 3272 3273 if (vmbus_handle_alloc(sc->sc_prichan, &sc->sc_rx_dma, sc->sc_rx_size, 3274 &sc->sc_rx_hndl)) { 3275 DPRINTF("%s: failed to obtain a PA handle\n", 3276 device_xname(sc->sc_dev)); 3277 return -1; 3278 } 3279 3280 memset(&cmd, 0, sizeof(cmd)); 3281 cmd.nvs_type = HVN_NVS_TYPE_RXBUF_CONN; 3282 cmd.nvs_gpadl = sc->sc_rx_hndl; 3283 cmd.nvs_sig = HVN_NVS_RXBUF_SIG; 3284 3285 tid = atomic_inc_uint_nv(&sc->sc_nvstid); 3286 mutex_enter(&sc->sc_nvsrsp_lock); 3287 if (hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid, 0)) 3288 goto errout; 3289 3290 rsp = (struct hvn_nvs_rxbuf_conn_resp *)&sc->sc_nvsrsp; 3291 if (rsp->nvs_status != HVN_NVS_STATUS_OK) { 3292 DPRINTF("%s: failed to set up the Rx ring\n", 3293 device_xname(sc->sc_dev)); 3294 goto errout; 3295 } 3296 3297 SET(sc->sc_flags, HVN_SCF_RXBUF_CONNECTED); 3298 3299 if (rsp->nvs_nsect > 1) { 3300 DPRINTF("%s: invalid number of Rx ring sections: %u\n", 3301 device_xname(sc->sc_dev), rsp->nvs_nsect); 3302 goto errout; 3303 } 3304 mutex_exit(&sc->sc_nvsrsp_lock); 3305 3306 return 0; 3307 3308 errout: 3309 mutex_exit(&sc->sc_nvsrsp_lock); 3310 hvn_nvs_disconnect_rxbuf(sc); 3311 return -1; 3312} 3313 3314static int 3315hvn_nvs_disconnect_rxbuf(struct hvn_softc *sc) 3316{ 3317 struct hvn_nvs_rxbuf_disconn cmd; 3318 uint64_t tid; 3319 int s, error; 3320 3321 if (ISSET(sc->sc_flags, HVN_SCF_RXBUF_CONNECTED)) { 3322 memset(&cmd, 0, sizeof(cmd)); 3323 cmd.nvs_type = HVN_NVS_TYPE_RXBUF_DISCONN; 3324 cmd.nvs_sig = HVN_NVS_RXBUF_SIG; 3325 3326 tid = atomic_inc_uint_nv(&sc->sc_nvstid); 3327 mutex_enter(&sc->sc_nvsrsp_lock); 3328 error = hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid, 3329 HVN_NVS_CMD_NORESP); 3330 if (error) { 3331 device_printf(sc->sc_dev, 3332 "failed to send rxbuf disconn: %d", error); 3333 } 3334 CLR(sc->sc_flags, HVN_SCF_RXBUF_CONNECTED); 3335 mutex_exit(&sc->sc_nvsrsp_lock); 3336 3337 /* 3338 * Wait for the hypervisor to receive this NVS request. 3339 * 3340 * NOTE: 3341 * The TX bufring will not be drained by the hypervisor, 3342 * if the primary channel is revoked. 3343 */ 3344 while (!vmbus_channel_tx_empty(sc->sc_prichan) && 3345 !vmbus_channel_is_revoked(sc->sc_prichan)) { 3346 DELAY(20); 3347 s = splnet(); 3348 hvn_nvs_intr1(&sc->sc_rxr[0], sc->sc_tx_process_limit, 3349 sc->sc_rx_process_limit); 3350 splx(s); 3351 } 3352 /* 3353 * Linger long enough for NVS to disconnect RXBUF. 3354 */ 3355 DELAY(200); 3356 } 3357 3358 if (sc->sc_prichan->ch_sc->sc_proto < VMBUS_VERSION_WIN10 && 3359 sc->sc_rx_hndl) { 3360 /* 3361 * Disconnect RXBUF from primary channel. 3362 */ 3363 vmbus_handle_free(sc->sc_prichan, sc->sc_rx_hndl); 3364 sc->sc_rx_hndl = 0; 3365 } 3366 3367 return 0; 3368} 3369 3370static int 3371hvn_nvs_connect_chim(struct hvn_softc *sc) 3372{ 3373 struct hvn_nvs_chim_conn cmd; 3374 const struct hvn_nvs_chim_conn_resp *rsp; 3375 uint64_t tid; 3376 3377 mutex_init(&sc->sc_chim_bmap_lock, MUTEX_DEFAULT, IPL_NET); 3378 3379 /* 3380 * Connect chimney sending buffer GPADL to the primary channel. 3381 * 3382 * NOTE: 3383 * Only primary channel has chimney sending buffer connected to it. 3384 * Sub-channels just share this chimney sending buffer. 3385 */ 3386 if (vmbus_handle_alloc(sc->sc_prichan, &sc->sc_chim_dma, HVN_CHIM_SIZE, 3387 &sc->sc_chim_hndl)) { 3388 DPRINTF("%s: failed to obtain a PA handle for chimney\n", 3389 device_xname(sc->sc_dev)); 3390 return -1; 3391 } 3392 3393 memset(&cmd, 0, sizeof(cmd)); 3394 cmd.nvs_type = HVN_NVS_TYPE_CHIM_CONN; 3395 cmd.nvs_gpadl = sc->sc_chim_hndl; 3396 cmd.nvs_sig = HVN_NVS_CHIM_SIG; 3397 3398 tid = atomic_inc_uint_nv(&sc->sc_nvstid); 3399 mutex_enter(&sc->sc_nvsrsp_lock); 3400 if (hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid, 0)) 3401 goto errout; 3402 3403 rsp = (struct hvn_nvs_chim_conn_resp *)&sc->sc_nvsrsp; 3404 if (rsp->nvs_status != HVN_NVS_STATUS_OK) { 3405 DPRINTF("%s: failed to set up chimney sending buffer\n", 3406 device_xname(sc->sc_dev)); 3407 goto errout; 3408 } 3409 3410 if (rsp->nvs_sectsz == 0 || 3411 (rsp->nvs_sectsz % sizeof(uint32_t)) != 0) { 3412 /* 3413 * Can't use chimney sending buffer; done! 3414 */ 3415 if (rsp->nvs_sectsz == 0) { 3416 device_printf(sc->sc_dev, 3417 "zero chimney sending buffer section size\n"); 3418 } else { 3419 device_printf(sc->sc_dev, 3420 "misaligned chimney sending buffers," 3421 " section size: %d", rsp->nvs_sectsz); 3422 } 3423 sc->sc_chim_szmax = 0; 3424 sc->sc_chim_cnt = 0; 3425 } else { 3426 sc->sc_chim_szmax = rsp->nvs_sectsz; 3427 sc->sc_chim_cnt = HVN_CHIM_SIZE / sc->sc_chim_szmax; 3428 } 3429 3430 if (sc->sc_chim_szmax > 0) { 3431 if ((HVN_CHIM_SIZE % sc->sc_chim_szmax) != 0) { 3432 device_printf(sc->sc_dev, 3433 "chimney sending sections are not properly " 3434 "aligned\n"); 3435 } 3436 if ((sc->sc_chim_cnt % LONG_BIT) != 0) { 3437 device_printf(sc->sc_dev, 3438 "discard %d chimney sending sections\n", 3439 sc->sc_chim_cnt % LONG_BIT); 3440 } 3441 3442 sc->sc_chim_bmap_cnt = sc->sc_chim_cnt / LONG_BIT; 3443 sc->sc_chim_bmap = kmem_zalloc(sc->sc_chim_bmap_cnt * 3444 sizeof(u_long), KM_SLEEP); 3445 } 3446 3447 /* Done! */ 3448 SET(sc->sc_flags, HVN_SCF_CHIM_CONNECTED); 3449 3450 aprint_verbose_dev(sc->sc_dev, "chimney sending buffer %d/%d\n", 3451 sc->sc_chim_szmax, sc->sc_chim_cnt); 3452 3453 mutex_exit(&sc->sc_nvsrsp_lock); 3454 3455 return 0; 3456 3457errout: 3458 mutex_exit(&sc->sc_nvsrsp_lock); 3459 hvn_nvs_disconnect_chim(sc); 3460 return -1; 3461} 3462 3463static int 3464hvn_nvs_disconnect_chim(struct hvn_softc *sc) 3465{ 3466 struct hvn_nvs_chim_disconn cmd; 3467 uint64_t tid; 3468 int s, error; 3469 3470 if (ISSET(sc->sc_flags, HVN_SCF_CHIM_CONNECTED)) { 3471 memset(&cmd, 0, sizeof(cmd)); 3472 cmd.nvs_type = HVN_NVS_TYPE_CHIM_DISCONN; 3473 cmd.nvs_sig = HVN_NVS_CHIM_SIG; 3474 3475 tid = atomic_inc_uint_nv(&sc->sc_nvstid); 3476 mutex_enter(&sc->sc_nvsrsp_lock); 3477 error = hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid, 3478 HVN_NVS_CMD_NORESP); 3479 if (error) { 3480 device_printf(sc->sc_dev, 3481 "failed to send chim disconn: %d", error); 3482 } 3483 CLR(sc->sc_flags, HVN_SCF_CHIM_CONNECTED); 3484 mutex_exit(&sc->sc_nvsrsp_lock); 3485 3486 /* 3487 * Wait for the hypervisor to receive this NVS request. 3488 * 3489 * NOTE: 3490 * The TX bufring will not be drained by the hypervisor, 3491 * if the primary channel is revoked. 3492 */ 3493 while (!vmbus_channel_tx_empty(sc->sc_prichan) && 3494 !vmbus_channel_is_revoked(sc->sc_prichan)) { 3495 DELAY(20); 3496 s = splnet(); 3497 hvn_nvs_intr1(&sc->sc_rxr[0], sc->sc_tx_process_limit, 3498 sc->sc_rx_process_limit); 3499 splx(s); 3500 } 3501 /* 3502 * Linger long enough for NVS to disconnect chimney 3503 * sending buffer. 3504 */ 3505 DELAY(200); 3506 } 3507 3508 if (sc->sc_prichan->ch_sc->sc_proto < VMBUS_VERSION_WIN10 && 3509 sc->sc_chim_hndl) { 3510 /* 3511 * Disconnect chimney sending buffer from primary channel. 3512 */ 3513 vmbus_handle_free(sc->sc_prichan, sc->sc_chim_hndl); 3514 sc->sc_chim_hndl = 0; 3515 } 3516 3517 if (sc->sc_chim_bmap != NULL) { 3518 kmem_free(sc->sc_chim_bmap, sc->sc_chim_cnt / LONG_BIT); 3519 sc->sc_chim_bmap = NULL; 3520 sc->sc_chim_bmap_cnt = 0; 3521 } 3522 3523 mutex_destroy(&sc->sc_chim_bmap_lock); 3524 3525 return 0; 3526} 3527 3528#define HVN_HANDLE_RING_DOTX __BIT(0) 3529 3530static int 3531hvn_handle_ring(struct hvn_rx_ring *rxr, int txlimit, int rxlimit) 3532{ 3533 struct hvn_softc *sc = rxr->rxr_softc; 3534 struct vmbus_chanpkt_hdr *cph; 3535 const struct hvn_nvs_hdr *nvs; 3536 uint64_t rid; 3537 uint32_t rlen; 3538 int n, tx = 0, rx = 0; 3539 int result = 0; 3540 int rv; 3541 3542 mutex_enter(&rxr->rxr_lock); 3543 for (;;) { 3544 rv = vmbus_channel_recv(rxr->rxr_chan, rxr->rxr_nvsbuf, 3545 HVN_NVS_BUFSIZE, &rlen, &rid, 1); 3546 if (rv != 0 || rlen == 0) { 3547 if (rv != EAGAIN) 3548 device_printf(sc->sc_dev, 3549 "failed to receive an NVSP packet\n"); 3550 break; 3551 } 3552 cph = (struct vmbus_chanpkt_hdr *)rxr->rxr_nvsbuf; 3553 nvs = (const struct hvn_nvs_hdr *)VMBUS_CHANPKT_CONST_DATA(cph); 3554 3555 if (cph->cph_type == VMBUS_CHANPKT_TYPE_COMP) { 3556 switch (nvs->nvs_type) { 3557 case HVN_NVS_TYPE_INIT_RESP: 3558 case HVN_NVS_TYPE_RXBUF_CONNRESP: 3559 case HVN_NVS_TYPE_CHIM_CONNRESP: 3560 case HVN_NVS_TYPE_SUBCH_RESP: 3561 mutex_enter(&sc->sc_nvsrsp_lock); 3562 /* copy the response back */ 3563 memcpy(&sc->sc_nvsrsp, nvs, HVN_NVS_MSGSIZE); 3564 sc->sc_nvsdone = 1; 3565 cv_signal(&sc->sc_nvsrsp_cv); 3566 mutex_exit(&sc->sc_nvsrsp_lock); 3567 break; 3568 case HVN_NVS_TYPE_RNDIS_ACK: 3569 if (rxr->rxr_txr == NULL) 3570 break; 3571 3572 result |= HVN_HANDLE_RING_DOTX; 3573 mutex_enter(&rxr->rxr_txr->txr_lock); 3574 hvn_txeof(rxr->rxr_txr, cph->cph_tid); 3575 mutex_exit(&rxr->rxr_txr->txr_lock); 3576 if (txlimit > 0 && ++tx >= txlimit) 3577 goto out; 3578 break; 3579 default: 3580 device_printf(sc->sc_dev, 3581 "unhandled NVSP packet type %u " 3582 "on completion\n", nvs->nvs_type); 3583 break; 3584 } 3585 } else if (cph->cph_type == VMBUS_CHANPKT_TYPE_RXBUF) { 3586 switch (nvs->nvs_type) { 3587 case HVN_NVS_TYPE_RNDIS: 3588 n = hvn_rndis_input(rxr, cph->cph_tid, cph); 3589 if (rxlimit > 0) { 3590 if (n < 0) 3591 goto out; 3592 rx += n; 3593 if (rx >= rxlimit) 3594 goto out; 3595 } 3596 break; 3597 default: 3598 device_printf(sc->sc_dev, 3599 "unhandled NVSP packet type %u " 3600 "on receive\n", nvs->nvs_type); 3601 break; 3602 } 3603 } else if (cph->cph_type == VMBUS_CHANPKT_TYPE_INBAND) { 3604 switch (nvs->nvs_type) { 3605 case HVN_NVS_TYPE_TXTBL_NOTE: 3606 /* Useless; ignore */ 3607 break; 3608 default: 3609 device_printf(sc->sc_dev, 3610 "got notify, nvs type %u\n", nvs->nvs_type); 3611 break; 3612 } 3613 } else 3614 device_printf(sc->sc_dev, 3615 "unknown NVSP packet type %u\n", cph->cph_type); 3616 } 3617out: 3618 mutex_exit(&rxr->rxr_lock); 3619 3620 return result; 3621} 3622 3623static void 3624hvn_nvs_intr1(struct hvn_rx_ring *rxr, int txlimit, int rxlimit) 3625{ 3626 struct hvn_softc *sc = rxr->rxr_softc; 3627 struct ifnet *ifp = SC2IFP(sc); 3628 struct hvn_tx_ring *txr = rxr->rxr_txr; 3629 int result; 3630 3631 rxr->rxr_workqueue = sc->sc_txrx_workqueue; 3632 3633 result = hvn_handle_ring(rxr, txlimit, rxlimit); 3634 3635 if ((result & HVN_HANDLE_RING_DOTX) && txr != NULL) { 3636 mutex_enter(&txr->txr_lock); 3637 /* ALTQ */ 3638 if (txr->txr_id == 0) { 3639 if_schedule_deferred_start(ifp); 3640 } 3641 softint_schedule(txr->txr_si); 3642 mutex_exit(&txr->txr_lock); 3643 } 3644} 3645 3646static void 3647hvn_schedule_handle_ring(struct hvn_softc *sc, struct hvn_rx_ring *rxr, 3648 bool intr) 3649{ 3650 3651 KASSERT(mutex_owned(&rxr->rxr_onwork_lock)); 3652 3653 if (rxr->rxr_workqueue) { 3654 if (!rxr->rxr_onlist) { 3655 rxr->rxr_onlist = true; 3656 if (intr) 3657 rxr->rxr_evdeferreq.ev_count++; 3658 else 3659 rxr->rxr_evredeferreq.ev_count++; 3660 workqueue_enqueue(sc->sc_wq, &rxr->rxr_wk, NULL); 3661 } 3662 } else { 3663 rxr->rxr_onlist = true; 3664 if (intr) 3665 rxr->rxr_evdeferreq.ev_count++; 3666 else 3667 rxr->rxr_evredeferreq.ev_count++; 3668 softint_schedule(rxr->rxr_si); 3669 } 3670} 3671 3672static void 3673hvn_handle_ring_common(struct hvn_rx_ring *rxr) 3674{ 3675 struct hvn_softc *sc = rxr->rxr_softc; 3676 int txlimit = sc->sc_tx_process_limit; 3677 int rxlimit = sc->sc_rx_process_limit; 3678 3679 rxr->rxr_evdefer.ev_count++; 3680 3681 mutex_enter(&rxr->rxr_onwork_lock); 3682 rxr->rxr_onproc = true; 3683 rxr->rxr_onlist = false; 3684 mutex_exit(&rxr->rxr_onwork_lock); 3685 3686 hvn_nvs_intr1(rxr, txlimit, rxlimit); 3687 3688 mutex_enter(&rxr->rxr_onwork_lock); 3689 if (vmbus_channel_unpause(rxr->rxr_chan)) { 3690 vmbus_channel_pause(rxr->rxr_chan); 3691 hvn_schedule_handle_ring(sc, rxr, false); 3692 } 3693 rxr->rxr_onproc = false; 3694 cv_broadcast(&rxr->rxr_onwork_cv); 3695 mutex_exit(&rxr->rxr_onwork_lock); 3696} 3697 3698static void 3699hvn_handle_ring_work(struct work *wk, void *arg) 3700{ 3701 struct hvn_rx_ring *rxr = container_of(wk, struct hvn_rx_ring, rxr_wk); 3702 3703 hvn_handle_ring_common(rxr); 3704} 3705 3706static void 3707hvn_nvs_softintr(void *arg) 3708{ 3709 struct hvn_rx_ring *rxr = arg; 3710 3711 hvn_handle_ring_common(rxr); 3712} 3713 3714static void 3715hvn_nvs_intr(void *arg) 3716{ 3717 struct hvn_rx_ring *rxr = arg; 3718 struct hvn_softc *sc = rxr->rxr_softc; 3719 int txlimit = cold ? 0 : sc->sc_tx_intr_process_limit; 3720 int rxlimit = cold ? 0 : sc->sc_rx_intr_process_limit; 3721 3722 rxr->rxr_evintr.ev_count++; 3723 3724 KASSERT(!rxr->rxr_onproc); 3725 KASSERT(!rxr->rxr_onlist); 3726 3727 vmbus_channel_pause(rxr->rxr_chan); 3728 3729 hvn_nvs_intr1(rxr, txlimit, rxlimit); 3730 3731 if (vmbus_channel_unpause(rxr->rxr_chan) && !cold) { 3732 vmbus_channel_pause(rxr->rxr_chan); 3733 mutex_enter(&rxr->rxr_onwork_lock); 3734 hvn_schedule_handle_ring(sc, rxr, true); 3735 mutex_exit(&rxr->rxr_onwork_lock); 3736 } 3737} 3738 3739static int 3740hvn_nvs_cmd(struct hvn_softc *sc, void *cmd, size_t cmdsize, uint64_t tid, 3741 u_int flags) 3742{ 3743 struct hvn_rx_ring *rxr = &sc->sc_rxr[0]; /* primary channel */ 3744 struct hvn_nvs_hdr *hdr = cmd; 3745 int tries = 10; 3746 int rv, s; 3747 3748 KASSERT(mutex_owned(&sc->sc_nvsrsp_lock)); 3749 3750 sc->sc_nvsdone = 0; 3751 3752 do { 3753 rv = vmbus_channel_send(rxr->rxr_chan, cmd, cmdsize, 3754 tid, VMBUS_CHANPKT_TYPE_INBAND, 3755 ISSET(flags, HVN_NVS_CMD_NORESP) ? 0 : 3756 VMBUS_CHANPKT_FLAG_RC); 3757 if (rv == EAGAIN) { 3758 DELAY(1000); 3759 } else if (rv) { 3760 DPRINTF("%s: NVSP operation %u send error %d\n", 3761 device_xname(sc->sc_dev), hdr->nvs_type, rv); 3762 return rv; 3763 } 3764 } while (rv != 0 && --tries > 0); 3765 3766 if (tries == 0 && rv != 0) { 3767 device_printf(sc->sc_dev, 3768 "NVSP operation %u send error %d\n", hdr->nvs_type, rv); 3769 return rv; 3770 } 3771 3772 if (ISSET(flags, HVN_NVS_CMD_NORESP)) 3773 return 0; 3774 3775 while (!sc->sc_nvsdone && !ISSET(sc->sc_flags, HVN_SCF_REVOKED)) { 3776 mutex_exit(&sc->sc_nvsrsp_lock); 3777 DELAY(1000); 3778 s = splnet(); 3779 hvn_nvs_intr1(rxr, 0, 0); 3780 splx(s); 3781 mutex_enter(&sc->sc_nvsrsp_lock); 3782 } 3783 3784 return 0; 3785} 3786 3787static int 3788hvn_nvs_ack(struct hvn_rx_ring *rxr, uint64_t tid) 3789{ 3790 struct hvn_softc *sc __unused = rxr->rxr_softc; 3791 struct hvn_nvs_rndis_ack cmd; 3792 int tries = 5; 3793 int rv; 3794 3795 cmd.nvs_type = HVN_NVS_TYPE_RNDIS_ACK; 3796 cmd.nvs_status = HVN_NVS_STATUS_OK; 3797 do { 3798 rv = vmbus_channel_send(rxr->rxr_chan, &cmd, sizeof(cmd), 3799 tid, VMBUS_CHANPKT_TYPE_COMP, 0); 3800 if (rv == EAGAIN) 3801 DELAY(10); 3802 else if (rv) { 3803 DPRINTF("%s: NVSP acknowledgement error %d\n", 3804 device_xname(sc->sc_dev), rv); 3805 return rv; 3806 } 3807 } while (rv != 0 && --tries > 0); 3808 return rv; 3809} 3810 3811static void 3812hvn_nvs_detach(struct hvn_softc *sc) 3813{ 3814 3815 hvn_nvs_disconnect_rxbuf(sc); 3816 hvn_nvs_disconnect_chim(sc); 3817} 3818 3819static int 3820hvn_nvs_alloc_subchannels(struct hvn_softc *sc, int *nsubchp) 3821{ 3822 struct hvn_nvs_subch_req cmd; 3823 struct hvn_nvs_subch_resp *rsp; 3824 uint64_t tid; 3825 int nsubch, nsubch_req; 3826 3827 nsubch_req = *nsubchp; 3828 KASSERTMSG(nsubch_req > 0, "invalid # of sub-channels %d", nsubch_req); 3829 3830 memset(&cmd, 0, sizeof(cmd)); 3831 cmd.nvs_type = HVN_NVS_TYPE_SUBCH_REQ; 3832 cmd.nvs_op = HVN_NVS_SUBCH_OP_ALLOC; 3833 cmd.nvs_nsubch = nsubch_req; 3834 3835 tid = atomic_inc_uint_nv(&sc->sc_nvstid); 3836 mutex_enter(&sc->sc_nvsrsp_lock); 3837 if (hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid, 0)) { 3838 mutex_exit(&sc->sc_nvsrsp_lock); 3839 return EIO; 3840 } 3841 3842 rsp = (struct hvn_nvs_subch_resp *)&sc->sc_nvsrsp; 3843 if (rsp->nvs_status != HVN_NVS_STATUS_OK) { 3844 mutex_exit(&sc->sc_nvsrsp_lock); 3845 DPRINTF("%s: failed to alloc sub-channels\n", 3846 device_xname(sc->sc_dev)); 3847 return EIO; 3848 } 3849 3850 nsubch = rsp->nvs_nsubch; 3851 if (nsubch > nsubch_req) { 3852 aprint_debug_dev(sc->sc_dev, 3853 "%u subchans are allocated, requested %d\n", 3854 nsubch, nsubch_req); 3855 nsubch = nsubch_req; 3856 } 3857 mutex_exit(&sc->sc_nvsrsp_lock); 3858 3859 *nsubchp = nsubch; 3860 3861 return 0; 3862} 3863 3864static inline struct rndis_cmd * 3865hvn_alloc_cmd(struct hvn_softc *sc) 3866{ 3867 struct rndis_cmd *rc; 3868 3869 mutex_enter(&sc->sc_cntl_fqlck); 3870 while ((rc = TAILQ_FIRST(&sc->sc_cntl_fq)) == NULL) 3871 cv_wait(&sc->sc_cntl_fqcv, &sc->sc_cntl_fqlck); 3872 TAILQ_REMOVE(&sc->sc_cntl_fq, rc, rc_entry); 3873 mutex_exit(&sc->sc_cntl_fqlck); 3874 return rc; 3875} 3876 3877static inline void 3878hvn_submit_cmd(struct hvn_softc *sc, struct rndis_cmd *rc) 3879{ 3880 3881 mutex_enter(&sc->sc_cntl_sqlck); 3882 TAILQ_INSERT_TAIL(&sc->sc_cntl_sq, rc, rc_entry); 3883 mutex_exit(&sc->sc_cntl_sqlck); 3884} 3885 3886static inline struct rndis_cmd * 3887hvn_complete_cmd(struct hvn_softc *sc, uint32_t id) 3888{ 3889 struct rndis_cmd *rc; 3890 3891 mutex_enter(&sc->sc_cntl_sqlck); 3892 TAILQ_FOREACH(rc, &sc->sc_cntl_sq, rc_entry) { 3893 if (rc->rc_id == id) { 3894 TAILQ_REMOVE(&sc->sc_cntl_sq, rc, rc_entry); 3895 break; 3896 } 3897 } 3898 mutex_exit(&sc->sc_cntl_sqlck); 3899 if (rc != NULL) { 3900 mutex_enter(&sc->sc_cntl_cqlck); 3901 TAILQ_INSERT_TAIL(&sc->sc_cntl_cq, rc, rc_entry); 3902 mutex_exit(&sc->sc_cntl_cqlck); 3903 } 3904 return rc; 3905} 3906 3907static inline void 3908hvn_release_cmd(struct hvn_softc *sc, struct rndis_cmd *rc) 3909{ 3910 3911 mutex_enter(&sc->sc_cntl_cqlck); 3912 TAILQ_REMOVE(&sc->sc_cntl_cq, rc, rc_entry); 3913 mutex_exit(&sc->sc_cntl_cqlck); 3914} 3915 3916static inline int 3917hvn_rollback_cmd(struct hvn_softc *sc, struct rndis_cmd *rc) 3918{ 3919 struct rndis_cmd *rn; 3920 3921 mutex_enter(&sc->sc_cntl_sqlck); 3922 TAILQ_FOREACH(rn, &sc->sc_cntl_sq, rc_entry) { 3923 if (rn == rc) { 3924 TAILQ_REMOVE(&sc->sc_cntl_sq, rc, rc_entry); 3925 mutex_exit(&sc->sc_cntl_sqlck); 3926 return 0; 3927 } 3928 } 3929 mutex_exit(&sc->sc_cntl_sqlck); 3930 return -1; 3931} 3932 3933static inline void 3934hvn_free_cmd(struct hvn_softc *sc, struct rndis_cmd *rc) 3935{ 3936 3937 memset(rc->rc_req, 0, sizeof(struct rndis_packet_msg)); 3938 memset(&rc->rc_cmp, 0, sizeof(rc->rc_cmp)); 3939 memset(&rc->rc_msg, 0, sizeof(rc->rc_msg)); 3940 mutex_enter(&sc->sc_cntl_fqlck); 3941 TAILQ_INSERT_TAIL(&sc->sc_cntl_fq, rc, rc_entry); 3942 cv_signal(&sc->sc_cntl_fqcv); 3943 mutex_exit(&sc->sc_cntl_fqlck); 3944} 3945 3946static int 3947hvn_rndis_init(struct hvn_softc *sc) 3948{ 3949 struct rndis_cmd *rc; 3950 int i; 3951 3952 /* RNDIS control message queues */ 3953 TAILQ_INIT(&sc->sc_cntl_sq); 3954 TAILQ_INIT(&sc->sc_cntl_cq); 3955 TAILQ_INIT(&sc->sc_cntl_fq); 3956 mutex_init(&sc->sc_cntl_sqlck, MUTEX_DEFAULT, IPL_NET); 3957 mutex_init(&sc->sc_cntl_cqlck, MUTEX_DEFAULT, IPL_NET); 3958 mutex_init(&sc->sc_cntl_fqlck, MUTEX_DEFAULT, IPL_NET); 3959 cv_init(&sc->sc_cntl_fqcv, "nvsalloc"); 3960 3961 for (i = 0; i < HVN_RNDIS_CTLREQS; i++) { 3962 rc = &sc->sc_cntl_msgs[i]; 3963 if (bus_dmamap_create(sc->sc_dmat, PAGE_SIZE, 1, PAGE_SIZE, 0, 3964 BUS_DMA_WAITOK, &rc->rc_dmap)) { 3965 DPRINTF("%s: failed to create RNDIS command map\n", 3966 device_xname(sc->sc_dev)); 3967 goto errout; 3968 } 3969 if (bus_dmamem_alloc(sc->sc_dmat, PAGE_SIZE, PAGE_SIZE, 3970 0, &rc->rc_segs, 1, &rc->rc_nsegs, BUS_DMA_WAITOK)) { 3971 DPRINTF("%s: failed to allocate RNDIS command\n", 3972 device_xname(sc->sc_dev)); 3973 bus_dmamap_destroy(sc->sc_dmat, rc->rc_dmap); 3974 goto errout; 3975 } 3976 if (bus_dmamem_map(sc->sc_dmat, &rc->rc_segs, rc->rc_nsegs, 3977 PAGE_SIZE, (void **)&rc->rc_req, BUS_DMA_WAITOK)) { 3978 DPRINTF("%s: failed to allocate RNDIS command\n", 3979 device_xname(sc->sc_dev)); 3980 bus_dmamem_free(sc->sc_dmat, &rc->rc_segs, 3981 rc->rc_nsegs); 3982 bus_dmamap_destroy(sc->sc_dmat, rc->rc_dmap); 3983 goto errout; 3984 } 3985 memset(rc->rc_req, 0, PAGE_SIZE); 3986 if (bus_dmamap_load(sc->sc_dmat, rc->rc_dmap, rc->rc_req, 3987 PAGE_SIZE, NULL, BUS_DMA_WAITOK)) { 3988 DPRINTF("%s: failed to load RNDIS command map\n", 3989 device_xname(sc->sc_dev)); 3990 bus_dmamem_unmap(sc->sc_dmat, rc->rc_req, PAGE_SIZE); 3991 rc->rc_req = NULL; 3992 bus_dmamem_free(sc->sc_dmat, &rc->rc_segs, 3993 rc->rc_nsegs); 3994 bus_dmamap_destroy(sc->sc_dmat, rc->rc_dmap); 3995 goto errout; 3996 } 3997 rc->rc_gpa = atop(rc->rc_dmap->dm_segs[0].ds_addr); 3998 mutex_init(&rc->rc_lock, MUTEX_DEFAULT, IPL_NET); 3999 cv_init(&rc->rc_cv, "rndiscmd"); 4000 TAILQ_INSERT_TAIL(&sc->sc_cntl_fq, rc, rc_entry); 4001 } 4002 4003 /* Initialize RNDIS Data command */ 4004 memset(&sc->sc_data_msg, 0, sizeof(sc->sc_data_msg)); 4005 sc->sc_data_msg.nvs_type = HVN_NVS_TYPE_RNDIS; 4006 sc->sc_data_msg.nvs_rndis_mtype = HVN_NVS_RNDIS_MTYPE_DATA; 4007 sc->sc_data_msg.nvs_chim_idx = HVN_NVS_CHIM_IDX_INVALID; 4008 4009 return 0; 4010 4011errout: 4012 hvn_rndis_destroy(sc); 4013 return -1; 4014} 4015 4016static void 4017hvn_rndis_destroy(struct hvn_softc *sc) 4018{ 4019 struct rndis_cmd *rc; 4020 int i; 4021 4022 for (i = 0; i < HVN_RNDIS_CTLREQS; i++) { 4023 rc = &sc->sc_cntl_msgs[i]; 4024 if (rc->rc_req == NULL) 4025 continue; 4026 4027 TAILQ_REMOVE(&sc->sc_cntl_fq, rc, rc_entry); 4028 bus_dmamap_unload(sc->sc_dmat, rc->rc_dmap); 4029 bus_dmamem_unmap(sc->sc_dmat, rc->rc_req, PAGE_SIZE); 4030 rc->rc_req = NULL; 4031 bus_dmamem_free(sc->sc_dmat, &rc->rc_segs, rc->rc_nsegs); 4032 bus_dmamap_destroy(sc->sc_dmat, rc->rc_dmap); 4033 mutex_destroy(&rc->rc_lock); 4034 cv_destroy(&rc->rc_cv); 4035 } 4036 4037 mutex_destroy(&sc->sc_cntl_sqlck); 4038 mutex_destroy(&sc->sc_cntl_cqlck); 4039 mutex_destroy(&sc->sc_cntl_fqlck); 4040 cv_destroy(&sc->sc_cntl_fqcv); 4041} 4042 4043static int 4044hvn_rndis_attach(struct hvn_softc *sc, int mtu) 4045{ 4046 struct rndis_init_req *req; 4047 struct rndis_init_comp *cmp; 4048 struct rndis_cmd *rc; 4049 int rv; 4050 4051 rc = hvn_alloc_cmd(sc); 4052 4053 bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE, 4054 BUS_DMASYNC_PREREAD); 4055 4056 rc->rc_id = atomic_inc_uint_nv(&sc->sc_rndisrid); 4057 4058 req = rc->rc_req; 4059 req->rm_type = REMOTE_NDIS_INITIALIZE_MSG; 4060 req->rm_len = sizeof(*req); 4061 req->rm_rid = rc->rc_id; 4062 req->rm_ver_major = RNDIS_VERSION_MAJOR; 4063 req->rm_ver_minor = RNDIS_VERSION_MINOR; 4064 req->rm_max_xfersz = HVN_RNDIS_XFER_SIZE; 4065 4066 rc->rc_cmplen = sizeof(*cmp); 4067 4068 bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE, 4069 BUS_DMASYNC_PREWRITE); 4070 4071 if ((rv = hvn_rndis_cmd(sc, rc, 0)) != 0) { 4072 DPRINTF("%s: INITIALIZE_MSG failed, error %d\n", 4073 device_xname(sc->sc_dev), rv); 4074 hvn_free_cmd(sc, rc); 4075 return -1; 4076 } 4077 cmp = (struct rndis_init_comp *)&rc->rc_cmp; 4078 if (cmp->rm_status != RNDIS_STATUS_SUCCESS) { 4079 DPRINTF("%s: failed to init RNDIS, error %#x\n", 4080 device_xname(sc->sc_dev), cmp->rm_status); 4081 hvn_free_cmd(sc, rc); 4082 return -1; 4083 } 4084 4085 sc->sc_rndis_agg_size = cmp->rm_pktmaxsz; 4086 sc->sc_rndis_agg_pkts = cmp->rm_pktmaxcnt; 4087 sc->sc_rndis_agg_align = __BIT(cmp->rm_align); 4088 4089 if (sc->sc_rndis_agg_align < sizeof(uint32_t)) { 4090 /* 4091 * The RNDIS packet message encap assumes that the RNDIS 4092 * packet message is at least 4 bytes aligned. Fix up the 4093 * alignment here, if the remote side sets the alignment 4094 * too low. 4095 */ 4096 aprint_verbose_dev(sc->sc_dev, 4097 "fixup RNDIS aggpkt align: %u -> %zu\n", 4098 sc->sc_rndis_agg_align, sizeof(uint32_t)); 4099 sc->sc_rndis_agg_align = sizeof(uint32_t); 4100 } 4101 4102 aprint_verbose_dev(sc->sc_dev, 4103 "RNDIS ver %u.%u, aggpkt size %u, aggpkt cnt %u, aggpkt align %u\n", 4104 cmp->rm_ver_major, cmp->rm_ver_minor, sc->sc_rndis_agg_size, 4105 sc->sc_rndis_agg_pkts, sc->sc_rndis_agg_align); 4106 4107 hvn_free_cmd(sc, rc); 4108 4109 return 0; 4110} 4111 4112static int 4113hvn_get_rsscaps(struct hvn_softc *sc, int *nrxr) 4114{ 4115 struct ndis_rss_caps in, caps; 4116 size_t caps_len; 4117 int error, rxr_cnt, indsz, hash_fnidx; 4118 uint32_t hash_func = 0, hash_types = 0; 4119 4120 *nrxr = 0; 4121 4122 if (sc->sc_ndisver < NDIS_VERSION_6_20) 4123 return EOPNOTSUPP; 4124 4125 memset(&in, 0, sizeof(in)); 4126 in.ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_CAPS; 4127 in.ndis_hdr.ndis_rev = NDIS_RSS_CAPS_REV_2; 4128 in.ndis_hdr.ndis_size = NDIS_RSS_CAPS_SIZE; 4129 4130 caps_len = NDIS_RSS_CAPS_SIZE; 4131 error = hvn_rndis_query2(sc, OID_GEN_RECEIVE_SCALE_CAPABILITIES, 4132 &in, NDIS_RSS_CAPS_SIZE, &caps, &caps_len, NDIS_RSS_CAPS_SIZE_6_0); 4133 if (error) 4134 return error; 4135 4136 /* 4137 * Preliminary verification. 4138 */ 4139 if (caps.ndis_hdr.ndis_type != NDIS_OBJTYPE_RSS_CAPS) { 4140 DPRINTF("%s: invalid NDIS objtype 0x%02x\n", 4141 device_xname(sc->sc_dev), caps.ndis_hdr.ndis_type); 4142 return EINVAL; 4143 } 4144 if (caps.ndis_hdr.ndis_rev < NDIS_RSS_CAPS_REV_1) { 4145 DPRINTF("%s: invalid NDIS objrev 0x%02x\n", 4146 device_xname(sc->sc_dev), caps.ndis_hdr.ndis_rev); 4147 return EINVAL; 4148 } 4149 if (caps.ndis_hdr.ndis_size > caps_len) { 4150 DPRINTF("%s: invalid NDIS objsize %u, data size %zu\n", 4151 device_xname(sc->sc_dev), caps.ndis_hdr.ndis_size, 4152 caps_len); 4153 return EINVAL; 4154 } else if (caps.ndis_hdr.ndis_size < NDIS_RSS_CAPS_SIZE_6_0) { 4155 DPRINTF("%s: invalid NDIS objsize %u\n", 4156 device_xname(sc->sc_dev), caps.ndis_hdr.ndis_size); 4157 return EINVAL; 4158 } 4159 4160 /* 4161 * Save information for later RSS configuration. 4162 */ 4163 if (caps.ndis_nrxr == 0) { 4164 DPRINTF("%s: 0 RX rings!?\n", device_xname(sc->sc_dev)); 4165 return EINVAL; 4166 } 4167 rxr_cnt = caps.ndis_nrxr; 4168 aprint_debug_dev(sc->sc_dev, "%u Rx rings\n", rxr_cnt); 4169 4170 if (caps.ndis_hdr.ndis_size == NDIS_RSS_CAPS_SIZE && 4171 caps.ndis_hdr.ndis_rev >= NDIS_RSS_CAPS_REV_2) { 4172 if (caps.ndis_nind > NDIS_HASH_INDCNT) { 4173 DPRINTF("%s: too many RSS indirect table entries %u\n", 4174 device_xname(sc->sc_dev), caps.ndis_nind); 4175 return EOPNOTSUPP; 4176 } 4177 if (!powerof2(caps.ndis_nind)) { 4178 DPRINTF("%s: RSS indirect table size is not power-of-2:" 4179 " %u\n", device_xname(sc->sc_dev), caps.ndis_nind); 4180 return EOPNOTSUPP; 4181 } 4182 4183 indsz = caps.ndis_nind; 4184 } else { 4185 indsz = NDIS_HASH_INDCNT; 4186 } 4187 if (rxr_cnt > indsz) { 4188 aprint_debug_dev(sc->sc_dev, 4189 "# of RX rings (%u) > RSS indirect table size %u\n", 4190 rxr_cnt, indsz); 4191 rxr_cnt = indsz; 4192 } 4193 4194 /* 4195 * NOTE: 4196 * Toeplitz is at the lowest bit, and it is prefered; so ffs(), 4197 * instead of fls(), is used here. 4198 */ 4199 hash_fnidx = ffs(caps.ndis_caps & NDIS_RSS_CAP_HASHFUNC_MASK); 4200 if (hash_fnidx == 0) { 4201 DPRINTF("%s: no hash functions, caps 0x%08x\n", 4202 device_xname(sc->sc_dev), caps.ndis_caps); 4203 return EOPNOTSUPP; 4204 } 4205 hash_func = 1 << (hash_fnidx - 1); /* ffs is 1-based */ 4206 4207 if (caps.ndis_caps & NDIS_RSS_CAP_IPV4) 4208 hash_types |= NDIS_HASH_IPV4 | NDIS_HASH_TCP_IPV4; 4209 if (caps.ndis_caps & NDIS_RSS_CAP_IPV6) 4210 hash_types |= NDIS_HASH_IPV6 | NDIS_HASH_TCP_IPV6; 4211 if (caps.ndis_caps & NDIS_RSS_CAP_IPV6_EX) 4212 hash_types |= NDIS_HASH_IPV6_EX | NDIS_HASH_TCP_IPV6_EX; 4213 if (hash_types == 0) { 4214 DPRINTF("%s: no hash types, caps 0x%08x\n", 4215 device_xname(sc->sc_dev), caps.ndis_caps); 4216 return EOPNOTSUPP; 4217 } 4218 aprint_debug_dev(sc->sc_dev, "RSS caps %#x\n", caps.ndis_caps); 4219 4220 sc->sc_rss_ind_size = indsz; 4221 sc->sc_rss_hcap = hash_func | hash_types; 4222 if (sc->sc_caps & HVN_CAPS_UDPHASH) { 4223 /* UDP 4-tuple hash is unconditionally enabled. */ 4224 sc->sc_rss_hcap |= NDIS_HASH_UDP_IPV4_X; 4225 } 4226 *nrxr = rxr_cnt; 4227 4228 return 0; 4229} 4230 4231static int 4232hvn_set_rss(struct hvn_softc *sc, uint16_t flags) 4233{ 4234 struct ndis_rssprm_toeplitz *rss = &sc->sc_rss; 4235 struct ndis_rss_params *params = &rss->rss_params; 4236 int len; 4237 4238 /* 4239 * Only NDIS 6.20+ is supported: 4240 * We only support 4bytes element in indirect table, which has been 4241 * adopted since NDIS 6.20. 4242 */ 4243 if (sc->sc_ndisver < NDIS_VERSION_6_20) 4244 return 0; 4245 4246 /* XXX only one can be specified through, popcnt? */ 4247 KASSERTMSG((sc->sc_rss_hash & NDIS_HASH_FUNCTION_MASK), 4248 "no hash func %08x", sc->sc_rss_hash); 4249 KASSERTMSG((sc->sc_rss_hash & NDIS_HASH_STD), 4250 "no standard hash types %08x", sc->sc_rss_hash); 4251 KASSERTMSG(sc->sc_rss_ind_size > 0, "no indirect table size"); 4252 4253 aprint_debug_dev(sc->sc_dev, "RSS indirect table size %d, hash %#x\n", 4254 sc->sc_rss_ind_size, sc->sc_rss_hash); 4255 4256 len = NDIS_RSSPRM_TOEPLITZ_SIZE(sc->sc_rss_ind_size); 4257 4258 memset(params, 0, sizeof(*params)); 4259 params->ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_PARAMS; 4260 params->ndis_hdr.ndis_rev = NDIS_RSS_PARAMS_REV_2; 4261 params->ndis_hdr.ndis_size = len; 4262 params->ndis_flags = flags; 4263 params->ndis_hash = 4264 sc->sc_rss_hash & (NDIS_HASH_FUNCTION_MASK | NDIS_HASH_STD); 4265 params->ndis_indsize = sizeof(rss->rss_ind[0]) * sc->sc_rss_ind_size; 4266 params->ndis_indoffset = 4267 offsetof(struct ndis_rssprm_toeplitz, rss_ind[0]); 4268 params->ndis_keysize = sizeof(rss->rss_key); 4269 params->ndis_keyoffset = 4270 offsetof(struct ndis_rssprm_toeplitz, rss_key[0]); 4271 4272 return hvn_rndis_set(sc, OID_GEN_RECEIVE_SCALE_PARAMETERS, rss, len); 4273} 4274 4275static void 4276hvn_fixup_rss_ind(struct hvn_softc *sc) 4277{ 4278 struct ndis_rssprm_toeplitz *rss = &sc->sc_rss; 4279 int i, nchan; 4280 4281 nchan = sc->sc_nrxr_inuse; 4282 KASSERTMSG(nchan > 1, "invalid # of channels %d", nchan); 4283 4284 /* 4285 * Check indirect table to make sure that all channels in it 4286 * can be used. 4287 */ 4288 for (i = 0; i < NDIS_HASH_INDCNT; i++) { 4289 if (rss->rss_ind[i] >= nchan) { 4290 DPRINTF("%s: RSS indirect table %d fixup: %u -> %d\n", 4291 device_xname(sc->sc_dev), i, rss->rss_ind[i], 4292 nchan - 1); 4293 rss->rss_ind[i] = nchan - 1; 4294 } 4295 } 4296} 4297 4298static int 4299hvn_get_hwcaps(struct hvn_softc *sc, struct ndis_offload *caps) 4300{ 4301 struct ndis_offload in; 4302 size_t caps_len, len; 4303 int error; 4304 4305 memset(&in, 0, sizeof(in)); 4306 in.ndis_hdr.ndis_type = NDIS_OBJTYPE_OFFLOAD; 4307 if (sc->sc_ndisver >= NDIS_VERSION_6_30) { 4308 in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_3; 4309 len = in.ndis_hdr.ndis_size = NDIS_OFFLOAD_SIZE; 4310 } else if (sc->sc_ndisver >= NDIS_VERSION_6_1) { 4311 in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_2; 4312 len = in.ndis_hdr.ndis_size = NDIS_OFFLOAD_SIZE_6_1; 4313 } else { 4314 in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_1; 4315 len = in.ndis_hdr.ndis_size = NDIS_OFFLOAD_SIZE_6_0; 4316 } 4317 4318 caps_len = NDIS_OFFLOAD_SIZE; 4319 error = hvn_rndis_query2(sc, OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES, 4320 &in, len, caps, &caps_len, NDIS_OFFLOAD_SIZE_6_0); 4321 if (error) 4322 return error; 4323 4324 /* 4325 * Preliminary verification. 4326 */ 4327 if (caps->ndis_hdr.ndis_type != NDIS_OBJTYPE_OFFLOAD) { 4328 DPRINTF("%s: invalid NDIS objtype 0x%02x\n", 4329 device_xname(sc->sc_dev), caps->ndis_hdr.ndis_type); 4330 return EINVAL; 4331 } 4332 if (caps->ndis_hdr.ndis_rev < NDIS_OFFLOAD_REV_1) { 4333 DPRINTF("%s: invalid NDIS objrev 0x%02x\n", 4334 device_xname(sc->sc_dev), caps->ndis_hdr.ndis_rev); 4335 return EINVAL; 4336 } 4337 if (caps->ndis_hdr.ndis_size > caps_len) { 4338 DPRINTF("%s: invalid NDIS objsize %u, data size %zu\n", 4339 device_xname(sc->sc_dev), caps->ndis_hdr.ndis_size, 4340 caps_len); 4341 return EINVAL; 4342 } else if (caps->ndis_hdr.ndis_size < NDIS_OFFLOAD_SIZE_6_0) { 4343 DPRINTF("%s: invalid NDIS objsize %u\n", 4344 device_xname(sc->sc_dev), caps->ndis_hdr.ndis_size); 4345 return EINVAL; 4346 } 4347 4348 /* 4349 * NOTE: 4350 * caps->ndis_hdr.ndis_size MUST be checked before accessing 4351 * NDIS 6.1+ specific fields. 4352 */ 4353 aprint_debug_dev(sc->sc_dev, "hwcaps rev %u\n", 4354 caps->ndis_hdr.ndis_rev); 4355 4356 aprint_debug_dev(sc->sc_dev, "hwcaps csum: " 4357 "ip4 tx 0x%x/0x%x rx 0x%x/0x%x, " 4358 "ip6 tx 0x%x/0x%x rx 0x%x/0x%x\n", 4359 caps->ndis_csum.ndis_ip4_txcsum, caps->ndis_csum.ndis_ip4_txenc, 4360 caps->ndis_csum.ndis_ip4_rxcsum, caps->ndis_csum.ndis_ip4_rxenc, 4361 caps->ndis_csum.ndis_ip6_txcsum, caps->ndis_csum.ndis_ip6_txenc, 4362 caps->ndis_csum.ndis_ip6_rxcsum, caps->ndis_csum.ndis_ip6_rxenc); 4363 aprint_debug_dev(sc->sc_dev, "hwcaps lsov2: " 4364 "ip4 maxsz %u minsg %u encap 0x%x, " 4365 "ip6 maxsz %u minsg %u encap 0x%x opts 0x%x\n", 4366 caps->ndis_lsov2.ndis_ip4_maxsz, caps->ndis_lsov2.ndis_ip4_minsg, 4367 caps->ndis_lsov2.ndis_ip4_encap, caps->ndis_lsov2.ndis_ip6_maxsz, 4368 caps->ndis_lsov2.ndis_ip6_minsg, caps->ndis_lsov2.ndis_ip6_encap, 4369 caps->ndis_lsov2.ndis_ip6_opts); 4370 4371 return 0; 4372} 4373 4374static int 4375hvn_set_capabilities(struct hvn_softc *sc, int mtu) 4376{ 4377 struct ndis_offload hwcaps; 4378 struct ndis_offload_params params; 4379 size_t len; 4380 uint32_t caps = 0; 4381 int error, tso_maxsz, tso_minsg; 4382 4383 error = hvn_get_hwcaps(sc, &hwcaps); 4384 if (error) { 4385 DPRINTF("%s: failed to query hwcaps\n", 4386 device_xname(sc->sc_dev)); 4387 return error; 4388 } 4389 4390 /* NOTE: 0 means "no change" */ 4391 memset(¶ms, 0, sizeof(params)); 4392 4393 params.ndis_hdr.ndis_type = NDIS_OBJTYPE_DEFAULT; 4394 if (sc->sc_ndisver < NDIS_VERSION_6_30) { 4395 params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_2; 4396 len = params.ndis_hdr.ndis_size = NDIS_OFFLOAD_PARAMS_SIZE_6_1; 4397 } else { 4398 params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_3; 4399 len = params.ndis_hdr.ndis_size = NDIS_OFFLOAD_PARAMS_SIZE; 4400 } 4401 4402 /* 4403 * TSO4/TSO6 setup. 4404 */ 4405 tso_maxsz = IP_MAXPACKET; 4406 tso_minsg = 2; 4407 if (hwcaps.ndis_lsov2.ndis_ip4_encap & NDIS_OFFLOAD_ENCAP_8023) { 4408 caps |= HVN_CAPS_TSO4; 4409 params.ndis_lsov2_ip4 = NDIS_OFFLOAD_LSOV2_ON; 4410 4411 if (hwcaps.ndis_lsov2.ndis_ip4_maxsz < tso_maxsz) 4412 tso_maxsz = hwcaps.ndis_lsov2.ndis_ip4_maxsz; 4413 if (hwcaps.ndis_lsov2.ndis_ip4_minsg > tso_minsg) 4414 tso_minsg = hwcaps.ndis_lsov2.ndis_ip4_minsg; 4415 } 4416 if ((hwcaps.ndis_lsov2.ndis_ip6_encap & NDIS_OFFLOAD_ENCAP_8023) && 4417 (hwcaps.ndis_lsov2.ndis_ip6_opts & HVN_NDIS_LSOV2_CAP_IP6) == 4418 HVN_NDIS_LSOV2_CAP_IP6) { 4419 caps |= HVN_CAPS_TSO6; 4420 params.ndis_lsov2_ip6 = NDIS_OFFLOAD_LSOV2_ON; 4421 4422 if (hwcaps.ndis_lsov2.ndis_ip6_maxsz < tso_maxsz) 4423 tso_maxsz = hwcaps.ndis_lsov2.ndis_ip6_maxsz; 4424 if (hwcaps.ndis_lsov2.ndis_ip6_minsg > tso_minsg) 4425 tso_minsg = hwcaps.ndis_lsov2.ndis_ip6_minsg; 4426 } 4427 sc->sc_tso_szmax = 0; 4428 sc->sc_tso_sgmin = 0; 4429 if (caps & (HVN_CAPS_TSO4 | HVN_CAPS_TSO6)) { 4430 KASSERTMSG(tso_maxsz <= IP_MAXPACKET, 4431 "invalid NDIS TSO maxsz %d", tso_maxsz); 4432 KASSERTMSG(tso_minsg >= 2, 4433 "invalid NDIS TSO minsg %d", tso_minsg); 4434 if (tso_maxsz < tso_minsg * mtu) { 4435 DPRINTF("%s: invalid NDIS TSO config: " 4436 "maxsz %d, minsg %d, mtu %d; " 4437 "disable TSO4 and TSO6\n", device_xname(sc->sc_dev), 4438 tso_maxsz, tso_minsg, mtu); 4439 caps &= ~(HVN_CAPS_TSO4 | HVN_CAPS_TSO6); 4440 params.ndis_lsov2_ip4 = NDIS_OFFLOAD_LSOV2_OFF; 4441 params.ndis_lsov2_ip6 = NDIS_OFFLOAD_LSOV2_OFF; 4442 } else { 4443 sc->sc_tso_szmax = tso_maxsz; 4444 sc->sc_tso_sgmin = tso_minsg; 4445 aprint_debug_dev(sc->sc_dev, 4446 "NDIS TSO szmax %d sgmin %d\n", 4447 sc->sc_tso_szmax, sc->sc_tso_sgmin); 4448 } 4449 } 4450 4451 /* IPv4 checksum */ 4452 if ((hwcaps.ndis_csum.ndis_ip4_txcsum & HVN_NDIS_TXCSUM_CAP_IP4) == 4453 HVN_NDIS_TXCSUM_CAP_IP4) { 4454 caps |= HVN_CAPS_IPCS; 4455 params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_TX; 4456 } 4457 if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_IP4) { 4458 if (params.ndis_ip4csum == NDIS_OFFLOAD_PARAM_TX) 4459 params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_TXRX; 4460 else 4461 params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_RX; 4462 } 4463 4464 /* TCP4 checksum */ 4465 if ((hwcaps.ndis_csum.ndis_ip4_txcsum & HVN_NDIS_TXCSUM_CAP_TCP4) == 4466 HVN_NDIS_TXCSUM_CAP_TCP4) { 4467 caps |= HVN_CAPS_TCP4CS; 4468 params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_TX; 4469 } 4470 if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_TCP4) { 4471 if (params.ndis_tcp4csum == NDIS_OFFLOAD_PARAM_TX) 4472 params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_TXRX; 4473 else 4474 params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_RX; 4475 } 4476 4477 /* UDP4 checksum */ 4478 if (hwcaps.ndis_csum.ndis_ip4_txcsum & NDIS_TXCSUM_CAP_UDP4) { 4479 caps |= HVN_CAPS_UDP4CS; 4480 params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_TX; 4481 } 4482 if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_UDP4) { 4483 if (params.ndis_udp4csum == NDIS_OFFLOAD_PARAM_TX) 4484 params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_TXRX; 4485 else 4486 params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_RX; 4487 } 4488 4489 /* TCP6 checksum */ 4490 if ((hwcaps.ndis_csum.ndis_ip6_txcsum & HVN_NDIS_TXCSUM_CAP_TCP6) == 4491 HVN_NDIS_TXCSUM_CAP_TCP6) { 4492 caps |= HVN_CAPS_TCP6CS; 4493 params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_TX; 4494 } 4495 if (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_TCP6) { 4496 if (params.ndis_tcp6csum == NDIS_OFFLOAD_PARAM_TX) 4497 params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_TXRX; 4498 else 4499 params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_RX; 4500 } 4501 4502 /* UDP6 checksum */ 4503 if ((hwcaps.ndis_csum.ndis_ip6_txcsum & HVN_NDIS_TXCSUM_CAP_UDP6) == 4504 HVN_NDIS_TXCSUM_CAP_UDP6) { 4505 caps |= HVN_CAPS_UDP6CS; 4506 params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_TX; 4507 } 4508 if (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_UDP6) { 4509 if (params.ndis_udp6csum == NDIS_OFFLOAD_PARAM_TX) 4510 params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_TXRX; 4511 else 4512 params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_RX; 4513 } 4514 4515 aprint_debug_dev(sc->sc_dev, "offload csum: " 4516 "ip4 %u, tcp4 %u, udp4 %u, tcp6 %u, udp6 %u\n", 4517 params.ndis_ip4csum, params.ndis_tcp4csum, params.ndis_udp4csum, 4518 params.ndis_tcp6csum, params.ndis_udp6csum); 4519 aprint_debug_dev(sc->sc_dev, "offload lsov2: ip4 %u, ip6 %u\n", 4520 params.ndis_lsov2_ip4, params.ndis_lsov2_ip6); 4521 4522 error = hvn_rndis_set(sc, OID_TCP_OFFLOAD_PARAMETERS, ¶ms, len); 4523 if (error) { 4524 DPRINTF("%s: offload config failed: %d\n", 4525 device_xname(sc->sc_dev), error); 4526 return error; 4527 } 4528 4529 aprint_debug_dev(sc->sc_dev, "offload config done\n"); 4530 sc->sc_caps |= caps; 4531 4532 return 0; 4533} 4534 4535static int 4536hvn_rndis_cmd(struct hvn_softc *sc, struct rndis_cmd *rc, u_int flags) 4537{ 4538 struct hvn_rx_ring *rxr = &sc->sc_rxr[0]; /* primary channel */ 4539 struct hvn_nvs_rndis *msg = &rc->rc_msg; 4540 struct rndis_msghdr *hdr = rc->rc_req; 4541 struct vmbus_gpa sgl[1]; 4542 int tries = 10; 4543 int rv, s; 4544 4545 msg->nvs_type = HVN_NVS_TYPE_RNDIS; 4546 msg->nvs_rndis_mtype = HVN_NVS_RNDIS_MTYPE_CTRL; 4547 msg->nvs_chim_idx = HVN_NVS_CHIM_IDX_INVALID; 4548 4549 sgl[0].gpa_page = rc->rc_gpa; 4550 sgl[0].gpa_len = hdr->rm_len; 4551 sgl[0].gpa_ofs = 0; 4552 4553 rc->rc_done = 0; 4554 4555 mutex_enter(&rc->rc_lock); 4556 4557 hvn_submit_cmd(sc, rc); 4558 4559 do { 4560 rv = vmbus_channel_send_sgl(rxr->rxr_chan, sgl, 1, &rc->rc_msg, 4561 sizeof(*msg), rc->rc_id); 4562 if (rv == EAGAIN) { 4563 DELAY(1000); 4564 } else if (rv) { 4565 mutex_exit(&rc->rc_lock); 4566 DPRINTF("%s: RNDIS operation %u send error %d\n", 4567 device_xname(sc->sc_dev), hdr->rm_type, rv); 4568 hvn_rollback_cmd(sc, rc); 4569 return rv; 4570 } 4571 } while (rv != 0 && --tries > 0); 4572 4573 if (tries == 0 && rv != 0) { 4574 mutex_exit(&rc->rc_lock); 4575 device_printf(sc->sc_dev, 4576 "RNDIS operation %u send error %d\n", hdr->rm_type, rv); 4577 hvn_rollback_cmd(sc, rc); 4578 return rv; 4579 } 4580 if (vmbus_channel_is_revoked(rxr->rxr_chan) || 4581 ISSET(flags, HVN_RNDIS_CMD_NORESP)) { 4582 /* No response */ 4583 mutex_exit(&rc->rc_lock); 4584 if (hvn_rollback_cmd(sc, rc)) 4585 hvn_release_cmd(sc, rc); 4586 return 0; 4587 } 4588 4589 bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE, 4590 BUS_DMASYNC_POSTWRITE); 4591 4592 while (!rc->rc_done && !ISSET(sc->sc_flags, HVN_SCF_REVOKED)) { 4593 mutex_exit(&rc->rc_lock); 4594 DELAY(1000); 4595 s = splnet(); 4596 hvn_nvs_intr1(rxr, 0, 0); 4597 splx(s); 4598 mutex_enter(&rc->rc_lock); 4599 } 4600 mutex_exit(&rc->rc_lock); 4601 4602 bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE, 4603 BUS_DMASYNC_POSTREAD); 4604 4605 if (!rc->rc_done) { 4606 rv = EINTR; 4607 if (hvn_rollback_cmd(sc, rc)) { 4608 hvn_release_cmd(sc, rc); 4609 rv = 0; 4610 } 4611 return rv; 4612 } 4613 4614 hvn_release_cmd(sc, rc); 4615 return 0; 4616} 4617 4618static int 4619hvn_rndis_input(struct hvn_rx_ring *rxr, uint64_t tid, void *arg) 4620{ 4621 struct hvn_softc *sc = rxr->rxr_softc; 4622 struct vmbus_chanpkt_prplist *cp = arg; 4623 uint32_t off, len, type; 4624 int i, rv, rx = 0; 4625 bool qfull = false; 4626 4627 if (sc->sc_rx_ring == NULL) { 4628 DPRINTF("%s: invalid rx ring\n", device_xname(sc->sc_dev)); 4629 return 0; 4630 } 4631 4632 for (i = 0; i < cp->cp_range_cnt; i++) { 4633 off = cp->cp_range[i].gpa_ofs; 4634 len = cp->cp_range[i].gpa_len; 4635 4636 KASSERT(off + len <= sc->sc_rx_size); 4637 KASSERT(len >= RNDIS_HEADER_OFFSET + 4); 4638 4639 memcpy(&type, sc->sc_rx_ring + off, sizeof(type)); 4640 switch (type) { 4641 /* data message */ 4642 case REMOTE_NDIS_PACKET_MSG: 4643 rv = hvn_rxeof(rxr, sc->sc_rx_ring + off, len); 4644 if (rv == 1) 4645 rx++; 4646 else if (rv == -1) /* The receive queue is full. */ 4647 qfull = true; 4648 break; 4649 /* completion messages */ 4650 case REMOTE_NDIS_INITIALIZE_CMPLT: 4651 case REMOTE_NDIS_QUERY_CMPLT: 4652 case REMOTE_NDIS_SET_CMPLT: 4653 case REMOTE_NDIS_RESET_CMPLT: 4654 case REMOTE_NDIS_KEEPALIVE_CMPLT: 4655 hvn_rndis_complete(sc, sc->sc_rx_ring + off, len); 4656 break; 4657 /* notification message */ 4658 case REMOTE_NDIS_INDICATE_STATUS_MSG: 4659 hvn_rndis_status(sc, sc->sc_rx_ring + off, len); 4660 break; 4661 default: 4662 device_printf(sc->sc_dev, 4663 "unhandled RNDIS message type %u\n", type); 4664 break; 4665 } 4666 } 4667 4668 hvn_nvs_ack(rxr, tid); 4669 4670 if (qfull) 4671 return -1; 4672 return rx; 4673} 4674 4675static inline struct mbuf * 4676hvn_devget(struct hvn_softc *sc, void *buf, uint32_t len) 4677{ 4678 struct ifnet *ifp = SC2IFP(sc); 4679 struct mbuf *m; 4680 size_t size = len + ETHER_ALIGN + ETHER_VLAN_ENCAP_LEN; 4681 4682 MGETHDR(m, M_NOWAIT, MT_DATA); 4683 if (m == NULL) 4684 return NULL; 4685 4686 if (size > MHLEN) { 4687 if (size <= MCLBYTES) 4688 MCLGET(m, M_NOWAIT); 4689 else 4690 MEXTMALLOC(m, size, M_NOWAIT); 4691 if ((m->m_flags & M_EXT) == 0) { 4692 m_freem(m); 4693 return NULL; 4694 } 4695 } 4696 4697 m->m_len = m->m_pkthdr.len = size; 4698 m_adj(m, ETHER_ALIGN + ETHER_VLAN_ENCAP_LEN); 4699 m_copyback(m, 0, len, buf); 4700 m_set_rcvif(m, ifp); 4701 return m; 4702} 4703 4704#define HVN_RXINFO_CSUM __BIT(NDIS_PKTINFO_TYPE_CSUM) 4705#define HVN_RXINFO_VLAN __BIT(NDIS_PKTINFO_TYPE_VLAN) 4706#define HVN_RXINFO_HASHVAL __BIT(HVN_NDIS_PKTINFO_TYPE_HASHVAL) 4707#define HVN_RXINFO_HASHINFO __BIT(HVN_NDIS_PKTINFO_TYPE_HASHINF) 4708#define HVN_RXINFO_ALL (HVN_RXINFO_CSUM | \ 4709 HVN_RXINFO_VLAN | \ 4710 HVN_RXINFO_HASHVAL | \ 4711 HVN_RXINFO_HASHINFO) 4712 4713static int 4714hvn_rxeof(struct hvn_rx_ring *rxr, uint8_t *buf, uint32_t len) 4715{ 4716 struct hvn_softc *sc = rxr->rxr_softc; 4717 struct ifnet *ifp = SC2IFP(sc); 4718 struct rndis_packet_msg *pkt; 4719 struct rndis_pktinfo *pi; 4720 struct mbuf *m; 4721 uint32_t mask, csum, vlan, hashval, hashinfo; 4722 4723 if (!(ifp->if_flags & IFF_RUNNING)) 4724 return 0; 4725 4726 if (len < sizeof(*pkt)) { 4727 device_printf(sc->sc_dev, "data packet too short: %u\n", 4728 len); 4729 return 0; 4730 } 4731 4732 pkt = (struct rndis_packet_msg *)buf; 4733 if (pkt->rm_dataoffset + pkt->rm_datalen > len) { 4734 device_printf(sc->sc_dev, 4735 "data packet out of bounds: %u@%u\n", pkt->rm_dataoffset, 4736 pkt->rm_datalen); 4737 return 0; 4738 } 4739 4740 if ((m = hvn_devget(sc, buf + RNDIS_HEADER_OFFSET + pkt->rm_dataoffset, 4741 pkt->rm_datalen)) == NULL) { 4742 if_statinc(ifp, if_ierrors); 4743 return 0; 4744 } 4745 4746 if (pkt->rm_pktinfooffset + pkt->rm_pktinfolen > len) { 4747 device_printf(sc->sc_dev, 4748 "pktinfo is out of bounds: %u@%u vs %u\n", 4749 pkt->rm_pktinfolen, pkt->rm_pktinfooffset, len); 4750 goto done; 4751 } 4752 4753 mask = csum = hashval = hashinfo = 0; 4754 vlan = 0xffffffff; 4755 pi = (struct rndis_pktinfo *)(buf + RNDIS_HEADER_OFFSET + 4756 pkt->rm_pktinfooffset); 4757 while (pkt->rm_pktinfolen > 0) { 4758 if (pi->rm_size > pkt->rm_pktinfolen) { 4759 device_printf(sc->sc_dev, 4760 "invalid pktinfo size: %u/%u\n", pi->rm_size, 4761 pkt->rm_pktinfolen); 4762 break; 4763 } 4764 4765 switch (pi->rm_type) { 4766 case NDIS_PKTINFO_TYPE_CSUM: 4767 memcpy(&csum, pi->rm_data, sizeof(csum)); 4768 SET(mask, HVN_RXINFO_CSUM); 4769 break; 4770 case NDIS_PKTINFO_TYPE_VLAN: 4771 memcpy(&vlan, pi->rm_data, sizeof(vlan)); 4772 SET(mask, HVN_RXINFO_VLAN); 4773 break; 4774 case HVN_NDIS_PKTINFO_TYPE_HASHVAL: 4775 memcpy(&hashval, pi->rm_data, sizeof(hashval)); 4776 SET(mask, HVN_RXINFO_HASHVAL); 4777 break; 4778 case HVN_NDIS_PKTINFO_TYPE_HASHINF: 4779 memcpy(&hashinfo, pi->rm_data, sizeof(hashinfo)); 4780 SET(mask, HVN_RXINFO_HASHINFO); 4781 break; 4782 default: 4783 DPRINTF("%s: unhandled pktinfo type %u\n", 4784 device_xname(sc->sc_dev), pi->rm_type); 4785 goto next; 4786 } 4787 4788 if (mask == HVN_RXINFO_ALL) { 4789 /* All found; done */ 4790 break; 4791 } 4792 next: 4793 pkt->rm_pktinfolen -= pi->rm_size; 4794 pi = (struct rndis_pktinfo *)((char *)pi + pi->rm_size); 4795 } 4796 4797 /* 4798 * Final fixup. 4799 * - If there is no hash value, invalidate the hash info. 4800 */ 4801 if (!ISSET(mask, HVN_RXINFO_HASHVAL)) 4802 hashinfo = 0; 4803 4804 if (csum != 0) { 4805 if (ISSET(csum, NDIS_RXCSUM_INFO_IPCS_OK) && 4806 ISSET(ifp->if_csum_flags_rx, M_CSUM_IPv4)) { 4807 SET(m->m_pkthdr.csum_flags, M_CSUM_IPv4); 4808 rxr->rxr_evcsum_ip.ev_count++; 4809 } 4810 if (ISSET(csum, NDIS_RXCSUM_INFO_TCPCS_OK) && 4811 ISSET(ifp->if_csum_flags_rx, M_CSUM_TCPv4)) { 4812 SET(m->m_pkthdr.csum_flags, M_CSUM_TCPv4); 4813 rxr->rxr_evcsum_tcp.ev_count++; 4814 } 4815 if (ISSET(csum, NDIS_RXCSUM_INFO_UDPCS_OK) && 4816 ISSET(ifp->if_csum_flags_rx, M_CSUM_UDPv4)) { 4817 SET(m->m_pkthdr.csum_flags, M_CSUM_UDPv4); 4818 rxr->rxr_evcsum_udp.ev_count++; 4819 } 4820 } 4821 4822 if (vlan != 0xffffffff) { 4823 uint16_t t = NDIS_VLAN_INFO_ID(vlan); 4824 t |= NDIS_VLAN_INFO_PRI(vlan) << EVL_PRIO_BITS; 4825 t |= NDIS_VLAN_INFO_CFI(vlan) << EVL_CFI_BITS; 4826 4827 if (ISSET(sc->sc_ec.ec_capenable, ETHERCAP_VLAN_HWTAGGING)) { 4828 vlan_set_tag(m, t); 4829 rxr->rxr_evvlanhwtagging.ev_count++; 4830 } else { 4831 struct ether_header eh; 4832 struct ether_vlan_header *evl; 4833 4834 KDASSERT(m->m_pkthdr.len >= sizeof(eh)); 4835 m_copydata(m, 0, sizeof(eh), &eh); 4836 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT); 4837 KDASSERT(m != NULL); 4838 4839 evl = mtod(m, struct ether_vlan_header *); 4840 memcpy(evl->evl_dhost, eh.ether_dhost, 4841 ETHER_ADDR_LEN * 2); 4842 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 4843 evl->evl_tag = htons(t); 4844 evl->evl_proto = eh.ether_type; 4845 } 4846 } 4847 4848 /* XXX RSS hash is not supported. */ 4849 4850 done: 4851 rxr->rxr_evpkts.ev_count++; 4852 if_percpuq_enqueue(sc->sc_ipq, m); 4853 /* XXX Unable to detect that the receive queue is full. */ 4854 return 1; 4855} 4856 4857static void 4858hvn_rndis_complete(struct hvn_softc *sc, uint8_t *buf, uint32_t len) 4859{ 4860 struct rndis_cmd *rc; 4861 uint32_t id; 4862 4863 memcpy(&id, buf + RNDIS_HEADER_OFFSET, sizeof(id)); 4864 if ((rc = hvn_complete_cmd(sc, id)) != NULL) { 4865 mutex_enter(&rc->rc_lock); 4866 if (len < rc->rc_cmplen) 4867 device_printf(sc->sc_dev, 4868 "RNDIS response %u too short: %u\n", id, len); 4869 else 4870 memcpy(&rc->rc_cmp, buf, rc->rc_cmplen); 4871 if (len > rc->rc_cmplen && 4872 len - rc->rc_cmplen > HVN_RNDIS_BUFSIZE) 4873 device_printf(sc->sc_dev, 4874 "RNDIS response %u too large: %u\n", id, len); 4875 else if (len > rc->rc_cmplen) 4876 memcpy(&rc->rc_cmpbuf, buf + rc->rc_cmplen, 4877 len - rc->rc_cmplen); 4878 rc->rc_done = 1; 4879 cv_signal(&rc->rc_cv); 4880 mutex_exit(&rc->rc_lock); 4881 } else { 4882 DPRINTF("%s: failed to complete RNDIS request id %u\n", 4883 device_xname(sc->sc_dev), id); 4884 } 4885} 4886 4887static int 4888hvn_rndis_output_sgl(struct hvn_tx_ring *txr, struct hvn_tx_desc *txd) 4889{ 4890 struct hvn_softc *sc = txr->txr_softc; 4891 uint64_t rid = (uint64_t)txd->txd_id << 32; 4892 int rv; 4893 4894 rv = vmbus_channel_send_sgl(txr->txr_chan, txd->txd_sgl, txd->txd_nsge, 4895 &sc->sc_data_msg, sizeof(sc->sc_data_msg), rid); 4896 if (rv) { 4897 DPRINTF("%s: RNDIS data send error %d\n", 4898 device_xname(sc->sc_dev), rv); 4899 return rv; 4900 } 4901 return 0; 4902} 4903 4904static int 4905hvn_rndis_output_chim(struct hvn_tx_ring *txr, struct hvn_tx_desc *txd) 4906{ 4907 struct hvn_nvs_rndis rndis; 4908 uint64_t rid = (uint64_t)txd->txd_id << 32; 4909 int rv; 4910 4911 memset(&rndis, 0, sizeof(rndis)); 4912 rndis.nvs_type = HVN_NVS_TYPE_RNDIS; 4913 rndis.nvs_rndis_mtype = HVN_NVS_RNDIS_MTYPE_DATA; 4914 rndis.nvs_chim_idx = txd->txd_chim_index; 4915 rndis.nvs_chim_sz = txd->txd_chim_size; 4916 4917 rv = vmbus_channel_send(txr->txr_chan, &rndis, sizeof(rndis), 4918 rid, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC); 4919 if (rv) { 4920 DPRINTF("%s: RNDIS chimney data send error %d: idx %u, sz %u\n", 4921 device_xname(sc->sc_dev), rv, rndis.nvs_chim_idx, 4922 rndis.nvs_chim_sz); 4923 return rv; 4924 } 4925 return 0; 4926} 4927 4928static void 4929hvn_rndis_status(struct hvn_softc *sc, uint8_t *buf, uint32_t len) 4930{ 4931 uint32_t status; 4932 4933 memcpy(&status, buf + RNDIS_HEADER_OFFSET, sizeof(status)); 4934 switch (status) { 4935 case RNDIS_STATUS_MEDIA_CONNECT: 4936 case RNDIS_STATUS_MEDIA_DISCONNECT: 4937 hvn_link_event(sc, HVN_LINK_EV_STATE_CHANGE); 4938 break; 4939 case RNDIS_STATUS_NETWORK_CHANGE: 4940 hvn_link_event(sc, HVN_LINK_EV_NETWORK_CHANGE); 4941 break; 4942 /* Ignore these */ 4943 case RNDIS_STATUS_OFFLOAD_CURRENT_CONFIG: 4944 case RNDIS_STATUS_LINK_SPEED_CHANGE: 4945 return; 4946 default: 4947 DPRINTF("%s: unhandled status %#x\n", device_xname(sc->sc_dev), 4948 status); 4949 return; 4950 } 4951} 4952 4953static int 4954hvn_rndis_query(struct hvn_softc *sc, uint32_t oid, void *res, size_t *length) 4955{ 4956 4957 return hvn_rndis_query2(sc, oid, NULL, 0, res, length, 0); 4958} 4959 4960static int 4961hvn_rndis_query2(struct hvn_softc *sc, uint32_t oid, const void *idata, 4962 size_t idlen, void *odata, size_t *odlen, size_t min_odlen) 4963{ 4964 struct rndis_cmd *rc; 4965 struct rndis_query_req *req; 4966 struct rndis_query_comp *cmp; 4967 size_t olength = *odlen; 4968 int rv; 4969 4970 rc = hvn_alloc_cmd(sc); 4971 4972 bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE, 4973 BUS_DMASYNC_PREREAD); 4974 4975 rc->rc_id = atomic_inc_uint_nv(&sc->sc_rndisrid); 4976 4977 req = rc->rc_req; 4978 req->rm_type = REMOTE_NDIS_QUERY_MSG; 4979 req->rm_len = sizeof(*req) + idlen; 4980 req->rm_rid = rc->rc_id; 4981 req->rm_oid = oid; 4982 req->rm_infobufoffset = sizeof(*req) - RNDIS_HEADER_OFFSET; 4983 if (idlen > 0) { 4984 KASSERT(sizeof(*req) + idlen <= PAGE_SIZE); 4985 req->rm_infobuflen = idlen; 4986 memcpy(req + 1, idata, idlen); 4987 } 4988 4989 rc->rc_cmplen = sizeof(*cmp); 4990 4991 bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE, 4992 BUS_DMASYNC_PREWRITE); 4993 4994 if ((rv = hvn_rndis_cmd(sc, rc, 0)) != 0) { 4995 DPRINTF("%s: QUERY_MSG failed, error %d\n", 4996 device_xname(sc->sc_dev), rv); 4997 hvn_free_cmd(sc, rc); 4998 return rv; 4999 } 5000 5001 cmp = (struct rndis_query_comp *)&rc->rc_cmp; 5002 switch (cmp->rm_status) { 5003 case RNDIS_STATUS_SUCCESS: 5004 if (cmp->rm_infobuflen > olength || 5005 (min_odlen > 0 && cmp->rm_infobuflen < min_odlen)) { 5006 rv = EINVAL; 5007 break; 5008 } 5009 memcpy(odata, rc->rc_cmpbuf, cmp->rm_infobuflen); 5010 *odlen = cmp->rm_infobuflen; 5011 break; 5012 default: 5013 *odlen = 0; 5014 rv = EIO; 5015 break; 5016 } 5017 5018 hvn_free_cmd(sc, rc); 5019 return rv; 5020} 5021 5022static int 5023hvn_rndis_set(struct hvn_softc *sc, uint32_t oid, void *data, size_t length) 5024{ 5025 struct rndis_cmd *rc; 5026 struct rndis_set_req *req; 5027 struct rndis_set_comp *cmp; 5028 int rv; 5029 5030 rc = hvn_alloc_cmd(sc); 5031 5032 bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE, 5033 BUS_DMASYNC_PREREAD); 5034 5035 rc->rc_id = atomic_inc_uint_nv(&sc->sc_rndisrid); 5036 5037 req = rc->rc_req; 5038 req->rm_type = REMOTE_NDIS_SET_MSG; 5039 req->rm_len = sizeof(*req) + length; 5040 req->rm_rid = rc->rc_id; 5041 req->rm_oid = oid; 5042 req->rm_infobufoffset = sizeof(*req) - RNDIS_HEADER_OFFSET; 5043 5044 rc->rc_cmplen = sizeof(*cmp); 5045 5046 if (length > 0) { 5047 KASSERT(sizeof(*req) + length < PAGE_SIZE); 5048 req->rm_infobuflen = length; 5049 memcpy(req + 1, data, length); 5050 } 5051 5052 bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE, 5053 BUS_DMASYNC_PREWRITE); 5054 5055 if ((rv = hvn_rndis_cmd(sc, rc, 0)) != 0) { 5056 DPRINTF("%s: SET_MSG failed, error %d\n", 5057 device_xname(sc->sc_dev), rv); 5058 hvn_free_cmd(sc, rc); 5059 return rv; 5060 } 5061 5062 cmp = (struct rndis_set_comp *)&rc->rc_cmp; 5063 if (cmp->rm_status != RNDIS_STATUS_SUCCESS) 5064 rv = EIO; 5065 5066 hvn_free_cmd(sc, rc); 5067 return rv; 5068} 5069 5070static int 5071hvn_rndis_open(struct hvn_softc *sc) 5072{ 5073 struct ifnet *ifp = SC2IFP(sc); 5074 uint32_t filter; 5075 int rv; 5076 5077 if (ifp->if_flags & IFF_PROMISC) { 5078 filter = RNDIS_PACKET_TYPE_PROMISCUOUS; 5079 } else { 5080 filter = RNDIS_PACKET_TYPE_DIRECTED; 5081 if (ifp->if_flags & IFF_BROADCAST) 5082 filter |= RNDIS_PACKET_TYPE_BROADCAST; 5083 if (ifp->if_flags & IFF_ALLMULTI) 5084 filter |= RNDIS_PACKET_TYPE_ALL_MULTICAST; 5085 else { 5086 struct ethercom *ec = &sc->sc_ec; 5087 struct ether_multi *enm; 5088 struct ether_multistep step; 5089 5090 ETHER_LOCK(ec); 5091 ETHER_FIRST_MULTI(step, ec, enm); 5092 /* TODO: support multicast list */ 5093 if (enm != NULL) 5094 filter |= RNDIS_PACKET_TYPE_ALL_MULTICAST; 5095 ETHER_UNLOCK(ec); 5096 } 5097 } 5098 5099 rv = hvn_rndis_set(sc, OID_GEN_CURRENT_PACKET_FILTER, 5100 &filter, sizeof(filter)); 5101 if (rv) { 5102 DPRINTF("%s: failed to set RNDIS filter to %#x\n", 5103 device_xname(sc->sc_dev), filter); 5104 } 5105 return rv; 5106} 5107 5108static int 5109hvn_rndis_close(struct hvn_softc *sc) 5110{ 5111 uint32_t filter = 0; 5112 int rv; 5113 5114 rv = hvn_rndis_set(sc, OID_GEN_CURRENT_PACKET_FILTER, 5115 &filter, sizeof(filter)); 5116 if (rv) { 5117 DPRINTF("%s: failed to clear RNDIS filter\n", 5118 device_xname(sc->sc_dev)); 5119 } 5120 return rv; 5121} 5122 5123static void 5124hvn_rndis_detach(struct hvn_softc *sc) 5125{ 5126 struct rndis_cmd *rc; 5127 struct rndis_halt_req *req; 5128 int rv; 5129 5130 rc = hvn_alloc_cmd(sc); 5131 5132 bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE, 5133 BUS_DMASYNC_PREREAD); 5134 5135 rc->rc_id = atomic_inc_uint_nv(&sc->sc_rndisrid); 5136 5137 req = rc->rc_req; 5138 req->rm_type = REMOTE_NDIS_HALT_MSG; 5139 req->rm_len = sizeof(*req); 5140 req->rm_rid = rc->rc_id; 5141 5142 bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE, 5143 BUS_DMASYNC_PREWRITE); 5144 5145 /* No RNDIS completion; rely on NVS message send completion */ 5146 if ((rv = hvn_rndis_cmd(sc, rc, HVN_RNDIS_CMD_NORESP)) != 0) { 5147 DPRINTF("%s: HALT_MSG failed, error %d\n", 5148 device_xname(sc->sc_dev), rv); 5149 } 5150 hvn_free_cmd(sc, rc); 5151} 5152 5153static void 5154hvn_init_sysctls(struct hvn_softc *sc) 5155{ 5156 struct sysctllog **log; 5157 const struct sysctlnode *rnode, *cnode, *rxnode, *txnode; 5158 const char *dvname; 5159 int error; 5160 5161 log = &sc->sc_sysctllog; 5162 dvname = device_xname(sc->sc_dev); 5163 5164 error = sysctl_createv(log, 0, NULL, &rnode, 5165 0, CTLTYPE_NODE, dvname, 5166 SYSCTL_DESCR("hvn information and settings"), 5167 NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL); 5168 if (error) 5169 goto err; 5170 5171 error = sysctl_createv(log, 0, &rnode, &cnode, 5172 CTLFLAG_READWRITE, CTLTYPE_BOOL, "txrx_workqueue", 5173 SYSCTL_DESCR("Use workqueue for packet processing"), 5174 NULL, 0, &sc->sc_txrx_workqueue, 0, CTL_CREATE, CTL_EOL); 5175 if (error) 5176 goto out; 5177 5178 error = sysctl_createv(log, 0, &rnode, &rxnode, 5179 0, CTLTYPE_NODE, "rx", 5180 SYSCTL_DESCR("hvn information and settings for Rx"), 5181 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL); 5182 if (error) 5183 goto out; 5184 5185 error = sysctl_createv(log, 0, &rxnode, NULL, 5186 CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit", 5187 SYSCTL_DESCR("max number of Rx packets" 5188 " to process for interrupt processing"), 5189 NULL, 0, &sc->sc_rx_intr_process_limit, 0, CTL_CREATE, CTL_EOL); 5190 if (error) 5191 goto out; 5192 5193 error = sysctl_createv(log, 0, &rxnode, NULL, 5194 CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit", 5195 SYSCTL_DESCR("max number of Rx packets" 5196 " to process for deferred processing"), 5197 NULL, 0, &sc->sc_rx_process_limit, 0, CTL_CREATE, CTL_EOL); 5198 if (error) 5199 goto out; 5200 5201 error = sysctl_createv(log, 0, &rnode, &txnode, 5202 0, CTLTYPE_NODE, "tx", 5203 SYSCTL_DESCR("hvn information and settings for Tx"), 5204 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL); 5205 if (error) 5206 goto out; 5207 5208 error = sysctl_createv(log, 0, &txnode, NULL, 5209 CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit", 5210 SYSCTL_DESCR("max number of Tx packets" 5211 " to process for interrupt processing"), 5212 NULL, 0, &sc->sc_tx_intr_process_limit, 0, CTL_CREATE, CTL_EOL); 5213 if (error) 5214 goto out; 5215 5216 error = sysctl_createv(log, 0, &txnode, NULL, 5217 CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit", 5218 SYSCTL_DESCR("max number of Tx packets" 5219 " to process for deferred processing"), 5220 NULL, 0, &sc->sc_tx_process_limit, 0, CTL_CREATE, CTL_EOL); 5221 if (error) 5222 goto out; 5223 5224 return; 5225 5226out: 5227 sysctl_teardown(log); 5228 sc->sc_sysctllog = NULL; 5229err: 5230 aprint_error_dev(sc->sc_dev, "sysctl_createv failed (err = %d)\n", 5231 error); 5232} 5233 5234SYSCTL_SETUP(sysctl_hw_hvn_setup, "sysctl hw.hvn setup") 5235{ 5236 const struct sysctlnode *rnode; 5237 const struct sysctlnode *cnode; 5238 int error; 5239 5240 error = sysctl_createv(clog, 0, NULL, &rnode, 5241 CTLFLAG_PERMANENT, CTLTYPE_NODE, "hvn", 5242 SYSCTL_DESCR("hvn global controls"), 5243 NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL); 5244 if (error) 5245 goto fail; 5246 5247 error = sysctl_createv(clog, 0, &rnode, &cnode, 5248 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, 5249 "udp_csum_fixup_mtu", 5250 SYSCTL_DESCR("UDP checksum offloding fixup MTU"), 5251 NULL, 0, &hvn_udpcs_fixup_mtu, sizeof(hvn_udpcs_fixup_mtu), 5252 CTL_CREATE, CTL_EOL); 5253 if (error) 5254 goto fail; 5255 5256 error = sysctl_createv(clog, 0, &rnode, &cnode, 5257 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, 5258 "chimney_size", 5259 SYSCTL_DESCR("Chimney send packet size limit"), 5260 NULL, 0, &hvn_tx_chimney_size, sizeof(hvn_tx_chimney_size), 5261 CTL_CREATE, CTL_EOL); 5262 if (error) 5263 goto fail; 5264 5265 error = sysctl_createv(clog, 0, &rnode, &cnode, 5266 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, 5267 "channel_count", 5268 SYSCTL_DESCR("# of channels to use"), 5269 NULL, 0, &hvn_channel_cnt, sizeof(hvn_channel_cnt), 5270 CTL_CREATE, CTL_EOL); 5271 if (error) 5272 goto fail; 5273 5274 error = sysctl_createv(clog, 0, &rnode, &cnode, 5275 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, 5276 "tx_ring_count", 5277 SYSCTL_DESCR("# of transmit rings to use"), 5278 NULL, 0, &hvn_tx_ring_cnt, sizeof(hvn_tx_ring_cnt), 5279 CTL_CREATE, CTL_EOL); 5280 if (error) 5281 goto fail; 5282 5283 return; 5284 5285fail: 5286 aprint_error("%s: sysctl_createv failed (err = %d)\n", __func__, error); 5287} 5288