1262152Sluigi/* 2262152Sluigi * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. 3262152Sluigi * 4262152Sluigi * Redistribution and use in source and binary forms, with or without 5262152Sluigi * modification, are permitted provided that the following conditions 6262152Sluigi * are met: 7262152Sluigi * 1. Redistributions of source code must retain the above copyright 8262152Sluigi * notice, this list of conditions and the following disclaimer. 9262152Sluigi * 2. Redistributions in binary form must reproduce the above copyright 10262152Sluigi * notice, this list of conditions and the following disclaimer in the 11262152Sluigi * documentation and/or other materials provided with the distribution. 12262152Sluigi * 13262152Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14262152Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15262152Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16262152Sluigi * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17262152Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18262152Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19262152Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20262152Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21262152Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22262152Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23262152Sluigi * SUCH DAMAGE. 24262152Sluigi */ 25262152Sluigi 26262152Sluigi/* 27262152Sluigi * This module implements netmap support on top of standard, 28262152Sluigi * unmodified device drivers. 29262152Sluigi * 30262152Sluigi * A NIOCREGIF request is handled here if the device does not 31262152Sluigi * have native support. TX and RX rings are emulated as follows: 32262152Sluigi * 33262152Sluigi * NIOCREGIF 34262152Sluigi * We preallocate a block of TX mbufs (roughly as many as 35262152Sluigi * tx descriptors; the number is not critical) to speed up 36262152Sluigi * operation during transmissions. The refcount on most of 37262152Sluigi * these buffers is artificially bumped up so we can recycle 38262152Sluigi * them more easily. Also, the destructor is intercepted 39262152Sluigi * so we use it as an interrupt notification to wake up 40262152Sluigi * processes blocked on a poll(). 41262152Sluigi * 42262152Sluigi * For each receive ring we allocate one "struct mbq" 43262152Sluigi * (an mbuf tailq plus a spinlock). We intercept packets 44262152Sluigi * (through if_input) 45262152Sluigi * on the receive path and put them in the mbq from which 46262152Sluigi * netmap receive routines can grab them. 47262152Sluigi * 48262152Sluigi * TX: 49262152Sluigi * in the generic_txsync() routine, netmap buffers are copied 50262152Sluigi * (or linked, in a future) to the preallocated mbufs 51262152Sluigi * and pushed to the transmit queue. Some of these mbufs 52262152Sluigi * (those with NS_REPORT, or otherwise every half ring) 53262152Sluigi * have the refcount=1, others have refcount=2. 54262152Sluigi * When the destructor is invoked, we take that as 55262152Sluigi * a notification that all mbufs up to that one in 56262152Sluigi * the specific ring have been completed, and generate 57262152Sluigi * the equivalent of a transmit interrupt. 58262152Sluigi * 59262152Sluigi * RX: 60262152Sluigi * 61262152Sluigi */ 62262152Sluigi 63262152Sluigi#ifdef __FreeBSD__ 64262152Sluigi 65262152Sluigi#include <sys/cdefs.h> /* prerequisite */ 66262152Sluigi__FBSDID("$FreeBSD$"); 67262152Sluigi 68262152Sluigi#include <sys/types.h> 69262152Sluigi#include <sys/errno.h> 70262152Sluigi#include <sys/malloc.h> 71262152Sluigi#include <sys/lock.h> /* PROT_EXEC */ 72262152Sluigi#include <sys/rwlock.h> 73262152Sluigi#include <sys/socket.h> /* sockaddrs */ 74262152Sluigi#include <sys/selinfo.h> 75262152Sluigi#include <net/if.h> 76262152Sluigi#include <net/if_var.h> 77262152Sluigi#include <machine/bus.h> /* bus_dmamap_* in netmap_kern.h */ 78262152Sluigi 79262152Sluigi// XXX temporary - D() defined here 80262152Sluigi#include <net/netmap.h> 81262152Sluigi#include <dev/netmap/netmap_kern.h> 82262152Sluigi#include <dev/netmap/netmap_mem2.h> 83262152Sluigi 84267282Sluigi#define rtnl_lock() ND("rtnl_lock called") 85267282Sluigi#define rtnl_unlock() ND("rtnl_unlock called") 86262152Sluigi#define MBUF_TXQ(m) ((m)->m_pkthdr.flowid) 87262152Sluigi#define MBUF_RXQ(m) ((m)->m_pkthdr.flowid) 88262152Sluigi#define smp_mb() 89262152Sluigi 90262152Sluigi/* 91267282Sluigi * FreeBSD mbuf allocator/deallocator in emulation mode: 92267282Sluigi * 93267282Sluigi * We allocate EXT_PACKET mbuf+clusters, but need to set M_NOFREE 94267282Sluigi * so that the destructor, if invoked, will not free the packet. 95267282Sluigi * In principle we should set the destructor only on demand, 96267282Sluigi * but since there might be a race we better do it on allocation. 97267282Sluigi * As a consequence, we also need to set the destructor or we 98267282Sluigi * would leak buffers. 99262152Sluigi */ 100262152Sluigi 101262152Sluigi/* 102267282Sluigi * mbuf wrappers 103262152Sluigi */ 104262152Sluigi 105270252Sluigi/* mbuf destructor, also need to change the type to EXT_EXTREF, 106262152Sluigi * add an M_NOFREE flag, and then clear the flag and 107262152Sluigi * chain into uma_zfree(zone_pack, mf) 108262152Sluigi * (or reinstall the buffer ?) 109262152Sluigi */ 110262152Sluigi#define SET_MBUF_DESTRUCTOR(m, fn) do { \ 111270252Sluigi (m)->m_ext.ext_free = (void *)fn; \ 112270252Sluigi (m)->m_ext.ext_type = EXT_EXTREF; \ 113270252Sluigi} while (0) 114262152Sluigi 115270252Sluigistatic void 116267333Sluiginetmap_default_mbuf_destructor(struct mbuf *m) 117270252Sluigi{ 118270252Sluigi /* restore original mbuf */ 119270252Sluigi m->m_ext.ext_buf = m->m_data = m->m_ext.ext_arg1; 120270252Sluigi m->m_ext.ext_arg1 = NULL; 121267282Sluigi m->m_ext.ext_type = EXT_PACKET; 122267282Sluigi m->m_ext.ext_free = NULL; 123270252Sluigi if (GET_MBUF_REFCNT(m) == 0) 124270252Sluigi SET_MBUF_REFCNT(m, 1); 125267282Sluigi uma_zfree(zone_pack, m); 126270252Sluigi} 127262152Sluigi 128270252Sluigistatic inline struct mbuf * 129270252Sluiginetmap_get_mbuf(int len) 130270252Sluigi{ 131267282Sluigi struct mbuf *m; 132297478Snp m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); 133267282Sluigi if (m) { 134297478Snp m->m_flags |= M_NOFREE; /* XXXNP: Almost certainly incorrect. */ 135270252Sluigi m->m_ext.ext_arg1 = m->m_ext.ext_buf; // XXX save 136267282Sluigi m->m_ext.ext_free = (void *)netmap_default_mbuf_destructor; 137267282Sluigi m->m_ext.ext_type = EXT_EXTREF; 138270252Sluigi ND(5, "create m %p refcnt %d", m, GET_MBUF_REFCNT(m)); 139267282Sluigi } 140267282Sluigi return m; 141270252Sluigi} 142267282Sluigi 143262152Sluigi 144262152Sluigi 145262152Sluigi#else /* linux */ 146262152Sluigi 147262152Sluigi#include "bsd_glue.h" 148262152Sluigi 149262152Sluigi#include <linux/rtnetlink.h> /* rtnl_[un]lock() */ 150262152Sluigi#include <linux/ethtool.h> /* struct ethtool_ops, get_ringparam */ 151262152Sluigi#include <linux/hrtimer.h> 152262152Sluigi 153262152Sluigi//#define REG_RESET 154262152Sluigi 155262152Sluigi#endif /* linux */ 156262152Sluigi 157262152Sluigi 158262152Sluigi/* Common headers. */ 159262152Sluigi#include <net/netmap.h> 160262152Sluigi#include <dev/netmap/netmap_kern.h> 161262152Sluigi#include <dev/netmap/netmap_mem2.h> 162262152Sluigi 163262152Sluigi 164262152Sluigi 165262152Sluigi/* ======================== usage stats =========================== */ 166262152Sluigi 167270252Sluigi#ifdef RATE_GENERIC 168262152Sluigi#define IFRATE(x) x 169262152Sluigistruct rate_stats { 170262152Sluigi unsigned long txpkt; 171262152Sluigi unsigned long txsync; 172262152Sluigi unsigned long txirq; 173262152Sluigi unsigned long rxpkt; 174262152Sluigi unsigned long rxirq; 175262152Sluigi unsigned long rxsync; 176262152Sluigi}; 177262152Sluigi 178262152Sluigistruct rate_context { 179262152Sluigi unsigned refcount; 180262152Sluigi struct timer_list timer; 181262152Sluigi struct rate_stats new; 182262152Sluigi struct rate_stats old; 183262152Sluigi}; 184262152Sluigi 185262152Sluigi#define RATE_PRINTK(_NAME_) \ 186262152Sluigi printk( #_NAME_ " = %lu Hz\n", (cur._NAME_ - ctx->old._NAME_)/RATE_PERIOD); 187262152Sluigi#define RATE_PERIOD 2 188262152Sluigistatic void rate_callback(unsigned long arg) 189262152Sluigi{ 190262152Sluigi struct rate_context * ctx = (struct rate_context *)arg; 191262152Sluigi struct rate_stats cur = ctx->new; 192262152Sluigi int r; 193262152Sluigi 194262152Sluigi RATE_PRINTK(txpkt); 195262152Sluigi RATE_PRINTK(txsync); 196262152Sluigi RATE_PRINTK(txirq); 197262152Sluigi RATE_PRINTK(rxpkt); 198262152Sluigi RATE_PRINTK(rxsync); 199262152Sluigi RATE_PRINTK(rxirq); 200262152Sluigi printk("\n"); 201262152Sluigi 202262152Sluigi ctx->old = cur; 203262152Sluigi r = mod_timer(&ctx->timer, jiffies + 204262152Sluigi msecs_to_jiffies(RATE_PERIOD * 1000)); 205262152Sluigi if (unlikely(r)) 206262152Sluigi D("[v1000] Error: mod_timer()"); 207262152Sluigi} 208262152Sluigi 209262152Sluigistatic struct rate_context rate_ctx; 210262152Sluigi 211270252Sluigivoid generic_rate(int txp, int txs, int txi, int rxp, int rxs, int rxi) 212270252Sluigi{ 213270252Sluigi if (txp) rate_ctx.new.txpkt++; 214270252Sluigi if (txs) rate_ctx.new.txsync++; 215270252Sluigi if (txi) rate_ctx.new.txirq++; 216270252Sluigi if (rxp) rate_ctx.new.rxpkt++; 217270252Sluigi if (rxs) rate_ctx.new.rxsync++; 218270252Sluigi if (rxi) rate_ctx.new.rxirq++; 219270252Sluigi} 220270252Sluigi 221262152Sluigi#else /* !RATE */ 222262152Sluigi#define IFRATE(x) 223262152Sluigi#endif /* !RATE */ 224262152Sluigi 225262152Sluigi 226262152Sluigi/* =============== GENERIC NETMAP ADAPTER SUPPORT ================= */ 227262152Sluigi 228262152Sluigi/* 229262152Sluigi * Wrapper used by the generic adapter layer to notify 230262152Sluigi * the poller threads. Differently from netmap_rx_irq(), we check 231270252Sluigi * only NAF_NETMAP_ON instead of NAF_NATIVE_ON to enable the irq. 232262152Sluigi */ 233262152Sluigistatic void 234262152Sluiginetmap_generic_irq(struct ifnet *ifp, u_int q, u_int *work_done) 235262152Sluigi{ 236270252Sluigi struct netmap_adapter *na = NA(ifp); 237270252Sluigi if (unlikely(!nm_netmap_on(na))) 238262152Sluigi return; 239262152Sluigi 240262152Sluigi netmap_common_irq(ifp, q, work_done); 241262152Sluigi} 242262152Sluigi 243262152Sluigi 244262152Sluigi/* Enable/disable netmap mode for a generic network interface. */ 245262152Sluigistatic int 246262152Sluigigeneric_netmap_register(struct netmap_adapter *na, int enable) 247262152Sluigi{ 248262152Sluigi struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na; 249262152Sluigi struct mbuf *m; 250262152Sluigi int error; 251262152Sluigi int i, r; 252262152Sluigi 253262152Sluigi if (!na) 254262152Sluigi return EINVAL; 255262152Sluigi 256262152Sluigi#ifdef REG_RESET 257262152Sluigi error = ifp->netdev_ops->ndo_stop(ifp); 258262152Sluigi if (error) { 259262152Sluigi return error; 260262152Sluigi } 261262152Sluigi#endif /* REG_RESET */ 262262152Sluigi 263262152Sluigi if (enable) { /* Enable netmap mode. */ 264267282Sluigi /* Init the mitigation support on all the rx queues. */ 265262152Sluigi gna->mit = malloc(na->num_rx_rings * sizeof(struct nm_generic_mit), 266262152Sluigi M_DEVBUF, M_NOWAIT | M_ZERO); 267262152Sluigi if (!gna->mit) { 268262152Sluigi D("mitigation allocation failed"); 269262152Sluigi error = ENOMEM; 270262152Sluigi goto out; 271262152Sluigi } 272262152Sluigi for (r=0; r<na->num_rx_rings; r++) 273270252Sluigi netmap_mitigation_init(&gna->mit[r], r, na); 274262152Sluigi 275262152Sluigi /* Initialize the rx queue, as generic_rx_handler() can 276262152Sluigi * be called as soon as netmap_catch_rx() returns. 277262152Sluigi */ 278262152Sluigi for (r=0; r<na->num_rx_rings; r++) { 279262152Sluigi mbq_safe_init(&na->rx_rings[r].rx_queue); 280262152Sluigi } 281262152Sluigi 282262152Sluigi /* 283262152Sluigi * Preallocate packet buffers for the tx rings. 284262152Sluigi */ 285262152Sluigi for (r=0; r<na->num_tx_rings; r++) 286262152Sluigi na->tx_rings[r].tx_pool = NULL; 287262152Sluigi for (r=0; r<na->num_tx_rings; r++) { 288262152Sluigi na->tx_rings[r].tx_pool = malloc(na->num_tx_desc * sizeof(struct mbuf *), 289262152Sluigi M_DEVBUF, M_NOWAIT | M_ZERO); 290262152Sluigi if (!na->tx_rings[r].tx_pool) { 291262152Sluigi D("tx_pool allocation failed"); 292262152Sluigi error = ENOMEM; 293262152Sluigi goto free_tx_pools; 294262152Sluigi } 295262152Sluigi for (i=0; i<na->num_tx_desc; i++) 296262152Sluigi na->tx_rings[r].tx_pool[i] = NULL; 297262152Sluigi for (i=0; i<na->num_tx_desc; i++) { 298270252Sluigi m = netmap_get_mbuf(NETMAP_BUF_SIZE(na)); 299262152Sluigi if (!m) { 300262152Sluigi D("tx_pool[%d] allocation failed", i); 301262152Sluigi error = ENOMEM; 302262152Sluigi goto free_tx_pools; 303262152Sluigi } 304262152Sluigi na->tx_rings[r].tx_pool[i] = m; 305262152Sluigi } 306262152Sluigi } 307262152Sluigi rtnl_lock(); 308262152Sluigi /* Prepare to intercept incoming traffic. */ 309262152Sluigi error = netmap_catch_rx(na, 1); 310262152Sluigi if (error) { 311262152Sluigi D("netdev_rx_handler_register() failed (%d)", error); 312262152Sluigi goto register_handler; 313262152Sluigi } 314270252Sluigi na->na_flags |= NAF_NETMAP_ON; 315262152Sluigi 316262152Sluigi /* Make netmap control the packet steering. */ 317262152Sluigi netmap_catch_tx(gna, 1); 318262152Sluigi 319262152Sluigi rtnl_unlock(); 320262152Sluigi 321270252Sluigi#ifdef RATE_GENERIC 322262152Sluigi if (rate_ctx.refcount == 0) { 323262152Sluigi D("setup_timer()"); 324262152Sluigi memset(&rate_ctx, 0, sizeof(rate_ctx)); 325262152Sluigi setup_timer(&rate_ctx.timer, &rate_callback, (unsigned long)&rate_ctx); 326262152Sluigi if (mod_timer(&rate_ctx.timer, jiffies + msecs_to_jiffies(1500))) { 327262152Sluigi D("Error: mod_timer()"); 328262152Sluigi } 329262152Sluigi } 330262152Sluigi rate_ctx.refcount++; 331262152Sluigi#endif /* RATE */ 332262152Sluigi 333262152Sluigi } else if (na->tx_rings[0].tx_pool) { 334262152Sluigi /* Disable netmap mode. We enter here only if the previous 335262152Sluigi generic_netmap_register(na, 1) was successfull. 336262152Sluigi If it was not, na->tx_rings[0].tx_pool was set to NULL by the 337262152Sluigi error handling code below. */ 338262152Sluigi rtnl_lock(); 339262152Sluigi 340270252Sluigi na->na_flags &= ~NAF_NETMAP_ON; 341262152Sluigi 342262152Sluigi /* Release packet steering control. */ 343262152Sluigi netmap_catch_tx(gna, 0); 344262152Sluigi 345262152Sluigi /* Do not intercept packets on the rx path. */ 346262152Sluigi netmap_catch_rx(na, 0); 347262152Sluigi 348262152Sluigi rtnl_unlock(); 349262152Sluigi 350262152Sluigi /* Free the mbufs going to the netmap rings */ 351262152Sluigi for (r=0; r<na->num_rx_rings; r++) { 352262152Sluigi mbq_safe_purge(&na->rx_rings[r].rx_queue); 353262152Sluigi mbq_safe_destroy(&na->rx_rings[r].rx_queue); 354262152Sluigi } 355262152Sluigi 356262152Sluigi for (r=0; r<na->num_rx_rings; r++) 357262152Sluigi netmap_mitigation_cleanup(&gna->mit[r]); 358262152Sluigi free(gna->mit, M_DEVBUF); 359262152Sluigi 360262152Sluigi for (r=0; r<na->num_tx_rings; r++) { 361262152Sluigi for (i=0; i<na->num_tx_desc; i++) { 362262152Sluigi m_freem(na->tx_rings[r].tx_pool[i]); 363262152Sluigi } 364262152Sluigi free(na->tx_rings[r].tx_pool, M_DEVBUF); 365262152Sluigi } 366262152Sluigi 367270252Sluigi#ifdef RATE_GENERIC 368262152Sluigi if (--rate_ctx.refcount == 0) { 369262152Sluigi D("del_timer()"); 370262152Sluigi del_timer(&rate_ctx.timer); 371262152Sluigi } 372262152Sluigi#endif 373262152Sluigi } 374262152Sluigi 375262152Sluigi#ifdef REG_RESET 376262152Sluigi error = ifp->netdev_ops->ndo_open(ifp); 377262152Sluigi if (error) { 378262152Sluigi goto free_tx_pools; 379262152Sluigi } 380262152Sluigi#endif 381262152Sluigi 382262152Sluigi return 0; 383262152Sluigi 384262152Sluigiregister_handler: 385262152Sluigi rtnl_unlock(); 386262152Sluigifree_tx_pools: 387262152Sluigi for (r=0; r<na->num_tx_rings; r++) { 388262152Sluigi if (na->tx_rings[r].tx_pool == NULL) 389262152Sluigi continue; 390262152Sluigi for (i=0; i<na->num_tx_desc; i++) 391262152Sluigi if (na->tx_rings[r].tx_pool[i]) 392262152Sluigi m_freem(na->tx_rings[r].tx_pool[i]); 393262152Sluigi free(na->tx_rings[r].tx_pool, M_DEVBUF); 394262152Sluigi na->tx_rings[r].tx_pool = NULL; 395262152Sluigi } 396262152Sluigi for (r=0; r<na->num_rx_rings; r++) { 397262152Sluigi netmap_mitigation_cleanup(&gna->mit[r]); 398262152Sluigi mbq_safe_destroy(&na->rx_rings[r].rx_queue); 399262152Sluigi } 400262152Sluigi free(gna->mit, M_DEVBUF); 401262152Sluigiout: 402262152Sluigi 403262152Sluigi return error; 404262152Sluigi} 405262152Sluigi 406262152Sluigi/* 407262152Sluigi * Callback invoked when the device driver frees an mbuf used 408262152Sluigi * by netmap to transmit a packet. This usually happens when 409262152Sluigi * the NIC notifies the driver that transmission is completed. 410262152Sluigi */ 411262152Sluigistatic void 412262152Sluigigeneric_mbuf_destructor(struct mbuf *m) 413262152Sluigi{ 414262152Sluigi netmap_generic_irq(MBUF_IFP(m), MBUF_TXQ(m), NULL); 415262152Sluigi#ifdef __FreeBSD__ 416267282Sluigi if (netmap_verbose) 417267282Sluigi RD(5, "Tx irq (%p) queue %d index %d" , m, MBUF_TXQ(m), (int)(uintptr_t)m->m_ext.ext_arg1); 418267282Sluigi netmap_default_mbuf_destructor(m); 419262152Sluigi#endif /* __FreeBSD__ */ 420262152Sluigi IFRATE(rate_ctx.new.txirq++); 421262152Sluigi} 422262152Sluigi 423270252Sluigiextern int netmap_adaptive_io; 424270252Sluigi 425262152Sluigi/* Record completed transmissions and update hwtail. 426262152Sluigi * 427262152Sluigi * The oldest tx buffer not yet completed is at nr_hwtail + 1, 428262152Sluigi * nr_hwcur is the first unsent buffer. 429262152Sluigi */ 430262152Sluigistatic u_int 431262152Sluigigeneric_netmap_tx_clean(struct netmap_kring *kring) 432262152Sluigi{ 433262152Sluigi u_int const lim = kring->nkr_num_slots - 1; 434262152Sluigi u_int nm_i = nm_next(kring->nr_hwtail, lim); 435262152Sluigi u_int hwcur = kring->nr_hwcur; 436262152Sluigi u_int n = 0; 437262152Sluigi struct mbuf **tx_pool = kring->tx_pool; 438262152Sluigi 439262152Sluigi while (nm_i != hwcur) { /* buffers not completed */ 440262152Sluigi struct mbuf *m = tx_pool[nm_i]; 441262152Sluigi 442262152Sluigi if (unlikely(m == NULL)) { 443262152Sluigi /* this is done, try to replenish the entry */ 444270252Sluigi tx_pool[nm_i] = m = netmap_get_mbuf(NETMAP_BUF_SIZE(kring->na)); 445262152Sluigi if (unlikely(m == NULL)) { 446262152Sluigi D("mbuf allocation failed, XXX error"); 447262152Sluigi // XXX how do we proceed ? break ? 448262152Sluigi return -ENOMEM; 449262152Sluigi } 450262152Sluigi } else if (GET_MBUF_REFCNT(m) != 1) { 451262152Sluigi break; /* This mbuf is still busy: its refcnt is 2. */ 452262152Sluigi } 453262152Sluigi n++; 454262152Sluigi nm_i = nm_next(nm_i, lim); 455270252Sluigi#if 0 /* rate adaptation */ 456270252Sluigi if (netmap_adaptive_io > 1) { 457270252Sluigi if (n >= netmap_adaptive_io) 458270252Sluigi break; 459270252Sluigi } else if (netmap_adaptive_io) { 460270252Sluigi /* if hwcur - nm_i < lim/8 do an early break 461270252Sluigi * so we prevent the sender from stalling. See CVT. 462270252Sluigi */ 463270252Sluigi if (hwcur >= nm_i) { 464270252Sluigi if (hwcur - nm_i < lim/2) 465270252Sluigi break; 466270252Sluigi } else { 467270252Sluigi if (hwcur + lim + 1 - nm_i < lim/2) 468270252Sluigi break; 469270252Sluigi } 470270252Sluigi } 471270252Sluigi#endif 472262152Sluigi } 473262152Sluigi kring->nr_hwtail = nm_prev(nm_i, lim); 474262152Sluigi ND("tx completed [%d] -> hwtail %d", n, kring->nr_hwtail); 475262152Sluigi 476262152Sluigi return n; 477262152Sluigi} 478262152Sluigi 479262152Sluigi 480262152Sluigi/* 481262152Sluigi * We have pending packets in the driver between nr_hwtail +1 and hwcur. 482262152Sluigi * Compute a position in the middle, to be used to generate 483262152Sluigi * a notification. 484262152Sluigi */ 485262152Sluigistatic inline u_int 486262152Sluigigeneric_tx_event_middle(struct netmap_kring *kring, u_int hwcur) 487262152Sluigi{ 488262152Sluigi u_int n = kring->nkr_num_slots; 489262152Sluigi u_int ntc = nm_next(kring->nr_hwtail, n-1); 490262152Sluigi u_int e; 491262152Sluigi 492262152Sluigi if (hwcur >= ntc) { 493262152Sluigi e = (hwcur + ntc) / 2; 494262152Sluigi } else { /* wrap around */ 495262152Sluigi e = (hwcur + n + ntc) / 2; 496262152Sluigi if (e >= n) { 497262152Sluigi e -= n; 498262152Sluigi } 499262152Sluigi } 500262152Sluigi 501262152Sluigi if (unlikely(e >= n)) { 502262152Sluigi D("This cannot happen"); 503262152Sluigi e = 0; 504262152Sluigi } 505262152Sluigi 506262152Sluigi return e; 507262152Sluigi} 508262152Sluigi 509262152Sluigi/* 510262152Sluigi * We have pending packets in the driver between nr_hwtail+1 and hwcur. 511262152Sluigi * Schedule a notification approximately in the middle of the two. 512262152Sluigi * There is a race but this is only called within txsync which does 513262152Sluigi * a double check. 514262152Sluigi */ 515262152Sluigistatic void 516262152Sluigigeneric_set_tx_event(struct netmap_kring *kring, u_int hwcur) 517262152Sluigi{ 518262152Sluigi struct mbuf *m; 519262152Sluigi u_int e; 520262152Sluigi 521262152Sluigi if (nm_next(kring->nr_hwtail, kring->nkr_num_slots -1) == hwcur) { 522262152Sluigi return; /* all buffers are free */ 523262152Sluigi } 524262152Sluigi e = generic_tx_event_middle(kring, hwcur); 525262152Sluigi 526262152Sluigi m = kring->tx_pool[e]; 527267282Sluigi ND(5, "Request Event at %d mbuf %p refcnt %d", e, m, m ? GET_MBUF_REFCNT(m) : -2 ); 528262152Sluigi if (m == NULL) { 529262152Sluigi /* This can happen if there is already an event on the netmap 530262152Sluigi slot 'e': There is nothing to do. */ 531262152Sluigi return; 532262152Sluigi } 533262152Sluigi kring->tx_pool[e] = NULL; 534262152Sluigi SET_MBUF_DESTRUCTOR(m, generic_mbuf_destructor); 535262152Sluigi 536262152Sluigi // XXX wmb() ? 537262152Sluigi /* Decrement the refcount an free it if we have the last one. */ 538262152Sluigi m_freem(m); 539262152Sluigi smp_mb(); 540262152Sluigi} 541262152Sluigi 542262152Sluigi 543262152Sluigi/* 544262152Sluigi * generic_netmap_txsync() transforms netmap buffers into mbufs 545262152Sluigi * and passes them to the standard device driver 546262152Sluigi * (ndo_start_xmit() or ifp->if_transmit() ). 547262152Sluigi * On linux this is not done directly, but using dev_queue_xmit(), 548262152Sluigi * since it implements the TX flow control (and takes some locks). 549262152Sluigi */ 550262152Sluigistatic int 551270252Sluigigeneric_netmap_txsync(struct netmap_kring *kring, int flags) 552262152Sluigi{ 553270252Sluigi struct netmap_adapter *na = kring->na; 554262152Sluigi struct ifnet *ifp = na->ifp; 555262152Sluigi struct netmap_ring *ring = kring->ring; 556262152Sluigi u_int nm_i; /* index into the netmap ring */ // j 557262152Sluigi u_int const lim = kring->nkr_num_slots - 1; 558262152Sluigi u_int const head = kring->rhead; 559270252Sluigi u_int ring_nr = kring->ring_id; 560262152Sluigi 561262152Sluigi IFRATE(rate_ctx.new.txsync++); 562262152Sluigi 563262152Sluigi // TODO: handle the case of mbuf allocation failure 564262152Sluigi 565262152Sluigi rmb(); 566262152Sluigi 567262152Sluigi /* 568262152Sluigi * First part: process new packets to send. 569262152Sluigi */ 570262152Sluigi nm_i = kring->nr_hwcur; 571262152Sluigi if (nm_i != head) { /* we have new packets to send */ 572262152Sluigi while (nm_i != head) { 573262152Sluigi struct netmap_slot *slot = &ring->slot[nm_i]; 574262152Sluigi u_int len = slot->len; 575270252Sluigi void *addr = NMB(na, slot); 576262152Sluigi 577262152Sluigi /* device-specific */ 578262152Sluigi struct mbuf *m; 579262152Sluigi int tx_ret; 580262152Sluigi 581270252Sluigi NM_CHECK_ADDR_LEN(na, addr, len); 582262152Sluigi 583262152Sluigi /* Tale a mbuf from the tx pool and copy in the user packet. */ 584262152Sluigi m = kring->tx_pool[nm_i]; 585262152Sluigi if (unlikely(!m)) { 586262152Sluigi RD(5, "This should never happen"); 587270252Sluigi kring->tx_pool[nm_i] = m = netmap_get_mbuf(NETMAP_BUF_SIZE(na)); 588262152Sluigi if (unlikely(m == NULL)) { 589262152Sluigi D("mbuf allocation failed"); 590262152Sluigi break; 591262152Sluigi } 592262152Sluigi } 593262152Sluigi /* XXX we should ask notifications when NS_REPORT is set, 594262152Sluigi * or roughly every half frame. We can optimize this 595262152Sluigi * by lazily requesting notifications only when a 596262152Sluigi * transmission fails. Probably the best way is to 597262152Sluigi * break on failures and set notifications when 598262152Sluigi * ring->cur == ring->tail || nm_i != cur 599262152Sluigi */ 600262152Sluigi tx_ret = generic_xmit_frame(ifp, m, addr, len, ring_nr); 601262152Sluigi if (unlikely(tx_ret)) { 602270252Sluigi ND(5, "start_xmit failed: err %d [nm_i %u, head %u, hwtail %u]", 603262152Sluigi tx_ret, nm_i, head, kring->nr_hwtail); 604262152Sluigi /* 605262152Sluigi * No room for this mbuf in the device driver. 606262152Sluigi * Request a notification FOR A PREVIOUS MBUF, 607262152Sluigi * then call generic_netmap_tx_clean(kring) to do the 608262152Sluigi * double check and see if we can free more buffers. 609262152Sluigi * If there is space continue, else break; 610262152Sluigi * NOTE: the double check is necessary if the problem 611262152Sluigi * occurs in the txsync call after selrecord(). 612262152Sluigi * Also, we need some way to tell the caller that not 613262152Sluigi * all buffers were queued onto the device (this was 614262152Sluigi * not a problem with native netmap driver where space 615262152Sluigi * is preallocated). The bridge has a similar problem 616262152Sluigi * and we solve it there by dropping the excess packets. 617262152Sluigi */ 618262152Sluigi generic_set_tx_event(kring, nm_i); 619262152Sluigi if (generic_netmap_tx_clean(kring)) { /* space now available */ 620262152Sluigi continue; 621262152Sluigi } else { 622262152Sluigi break; 623262152Sluigi } 624262152Sluigi } 625262152Sluigi slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); 626262152Sluigi nm_i = nm_next(nm_i, lim); 627262152Sluigi IFRATE(rate_ctx.new.txpkt ++); 628262152Sluigi } 629262152Sluigi 630262152Sluigi /* Update hwcur to the next slot to transmit. */ 631262152Sluigi kring->nr_hwcur = nm_i; /* not head, we could break early */ 632262152Sluigi } 633262152Sluigi 634262152Sluigi /* 635262152Sluigi * Second, reclaim completed buffers 636262152Sluigi */ 637262152Sluigi if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) { 638262152Sluigi /* No more available slots? Set a notification event 639262152Sluigi * on a netmap slot that will be cleaned in the future. 640262152Sluigi * No doublecheck is performed, since txsync() will be 641262152Sluigi * called twice by netmap_poll(). 642262152Sluigi */ 643262152Sluigi generic_set_tx_event(kring, nm_i); 644262152Sluigi } 645262152Sluigi ND("tx #%d, hwtail = %d", n, kring->nr_hwtail); 646262152Sluigi 647262152Sluigi generic_netmap_tx_clean(kring); 648262152Sluigi 649262152Sluigi nm_txsync_finalize(kring); 650262152Sluigi 651262152Sluigi return 0; 652262152Sluigi} 653262152Sluigi 654262152Sluigi 655262152Sluigi/* 656262152Sluigi * This handler is registered (through netmap_catch_rx()) 657262152Sluigi * within the attached network interface 658262152Sluigi * in the RX subsystem, so that every mbuf passed up by 659262152Sluigi * the driver can be stolen to the network stack. 660262152Sluigi * Stolen packets are put in a queue where the 661262152Sluigi * generic_netmap_rxsync() callback can extract them. 662262152Sluigi */ 663262152Sluigivoid 664262152Sluigigeneric_rx_handler(struct ifnet *ifp, struct mbuf *m) 665262152Sluigi{ 666262152Sluigi struct netmap_adapter *na = NA(ifp); 667262152Sluigi struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na; 668262152Sluigi u_int work_done; 669262152Sluigi u_int rr = MBUF_RXQ(m); // receive ring number 670262152Sluigi 671262152Sluigi if (rr >= na->num_rx_rings) { 672262152Sluigi rr = rr % na->num_rx_rings; // XXX expensive... 673262152Sluigi } 674262152Sluigi 675262152Sluigi /* limit the size of the queue */ 676262152Sluigi if (unlikely(mbq_len(&na->rx_rings[rr].rx_queue) > 1024)) { 677262152Sluigi m_freem(m); 678262152Sluigi } else { 679262152Sluigi mbq_safe_enqueue(&na->rx_rings[rr].rx_queue, m); 680262152Sluigi } 681262152Sluigi 682262152Sluigi if (netmap_generic_mit < 32768) { 683262152Sluigi /* no rx mitigation, pass notification up */ 684262152Sluigi netmap_generic_irq(na->ifp, rr, &work_done); 685262152Sluigi IFRATE(rate_ctx.new.rxirq++); 686262152Sluigi } else { 687262152Sluigi /* same as send combining, filter notification if there is a 688262152Sluigi * pending timer, otherwise pass it up and start a timer. 689262152Sluigi */ 690262152Sluigi if (likely(netmap_mitigation_active(&gna->mit[rr]))) { 691262152Sluigi /* Record that there is some pending work. */ 692262152Sluigi gna->mit[rr].mit_pending = 1; 693262152Sluigi } else { 694262152Sluigi netmap_generic_irq(na->ifp, rr, &work_done); 695262152Sluigi IFRATE(rate_ctx.new.rxirq++); 696262152Sluigi netmap_mitigation_start(&gna->mit[rr]); 697262152Sluigi } 698262152Sluigi } 699262152Sluigi} 700262152Sluigi 701262152Sluigi/* 702262152Sluigi * generic_netmap_rxsync() extracts mbufs from the queue filled by 703262152Sluigi * generic_netmap_rx_handler() and puts their content in the netmap 704262152Sluigi * receive ring. 705262152Sluigi * Access must be protected because the rx handler is asynchronous, 706262152Sluigi */ 707262152Sluigistatic int 708270252Sluigigeneric_netmap_rxsync(struct netmap_kring *kring, int flags) 709262152Sluigi{ 710262152Sluigi struct netmap_ring *ring = kring->ring; 711270252Sluigi struct netmap_adapter *na = kring->na; 712262152Sluigi u_int nm_i; /* index into the netmap ring */ //j, 713262152Sluigi u_int n; 714262152Sluigi u_int const lim = kring->nkr_num_slots - 1; 715262152Sluigi u_int const head = nm_rxsync_prologue(kring); 716262152Sluigi int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; 717262152Sluigi 718262152Sluigi if (head > lim) 719262152Sluigi return netmap_ring_reinit(kring); 720262152Sluigi 721262152Sluigi /* 722262152Sluigi * First part: import newly received packets. 723262152Sluigi */ 724262152Sluigi if (netmap_no_pendintr || force_update) { 725262152Sluigi /* extract buffers from the rx queue, stop at most one 726262152Sluigi * slot before nr_hwcur (stop_i) 727262152Sluigi */ 728262152Sluigi uint16_t slot_flags = kring->nkr_slot_flags; 729262152Sluigi u_int stop_i = nm_prev(kring->nr_hwcur, lim); 730262152Sluigi 731262152Sluigi nm_i = kring->nr_hwtail; /* first empty slot in the receive ring */ 732262152Sluigi for (n = 0; nm_i != stop_i; n++) { 733262152Sluigi int len; 734270252Sluigi void *addr = NMB(na, &ring->slot[nm_i]); 735262152Sluigi struct mbuf *m; 736262152Sluigi 737262152Sluigi /* we only check the address here on generic rx rings */ 738270252Sluigi if (addr == NETMAP_BUF_BASE(na)) { /* Bad buffer */ 739262152Sluigi return netmap_ring_reinit(kring); 740262152Sluigi } 741262152Sluigi /* 742262152Sluigi * Call the locked version of the function. 743262152Sluigi * XXX Ideally we could grab a batch of mbufs at once 744262152Sluigi * and save some locking overhead. 745262152Sluigi */ 746262152Sluigi m = mbq_safe_dequeue(&kring->rx_queue); 747262152Sluigi if (!m) /* no more data */ 748262152Sluigi break; 749262152Sluigi len = MBUF_LEN(m); 750262152Sluigi m_copydata(m, 0, len, addr); 751262152Sluigi ring->slot[nm_i].len = len; 752262152Sluigi ring->slot[nm_i].flags = slot_flags; 753262152Sluigi m_freem(m); 754262152Sluigi nm_i = nm_next(nm_i, lim); 755262152Sluigi } 756262152Sluigi if (n) { 757262152Sluigi kring->nr_hwtail = nm_i; 758262152Sluigi IFRATE(rate_ctx.new.rxpkt += n); 759262152Sluigi } 760262152Sluigi kring->nr_kflags &= ~NKR_PENDINTR; 761262152Sluigi } 762262152Sluigi 763262152Sluigi // XXX should we invert the order ? 764262152Sluigi /* 765262152Sluigi * Second part: skip past packets that userspace has released. 766262152Sluigi */ 767262152Sluigi nm_i = kring->nr_hwcur; 768262152Sluigi if (nm_i != head) { 769262152Sluigi /* Userspace has released some packets. */ 770262152Sluigi for (n = 0; nm_i != head; n++) { 771262152Sluigi struct netmap_slot *slot = &ring->slot[nm_i]; 772262152Sluigi 773262152Sluigi slot->flags &= ~NS_BUF_CHANGED; 774262152Sluigi nm_i = nm_next(nm_i, lim); 775262152Sluigi } 776262152Sluigi kring->nr_hwcur = head; 777262152Sluigi } 778262152Sluigi /* tell userspace that there might be new packets. */ 779262152Sluigi nm_rxsync_finalize(kring); 780262152Sluigi IFRATE(rate_ctx.new.rxsync++); 781262152Sluigi 782262152Sluigi return 0; 783262152Sluigi} 784262152Sluigi 785262152Sluigistatic void 786262152Sluigigeneric_netmap_dtor(struct netmap_adapter *na) 787262152Sluigi{ 788262152Sluigi struct ifnet *ifp = na->ifp; 789262152Sluigi struct netmap_generic_adapter *gna = (struct netmap_generic_adapter*)na; 790262152Sluigi struct netmap_adapter *prev_na = gna->prev; 791262152Sluigi 792262152Sluigi if (prev_na != NULL) { 793262152Sluigi D("Released generic NA %p", gna); 794262152Sluigi if_rele(na->ifp); 795262152Sluigi netmap_adapter_put(prev_na); 796262152Sluigi } 797262152Sluigi if (ifp != NULL) { 798262152Sluigi WNA(ifp) = prev_na; 799262152Sluigi D("Restored native NA %p", prev_na); 800262152Sluigi na->ifp = NULL; 801262152Sluigi } 802262152Sluigi} 803262152Sluigi 804262152Sluigi/* 805262152Sluigi * generic_netmap_attach() makes it possible to use netmap on 806262152Sluigi * a device without native netmap support. 807262152Sluigi * This is less performant than native support but potentially 808262152Sluigi * faster than raw sockets or similar schemes. 809262152Sluigi * 810262152Sluigi * In this "emulated" mode, netmap rings do not necessarily 811262152Sluigi * have the same size as those in the NIC. We use a default 812262152Sluigi * value and possibly override it if the OS has ways to fetch the 813262152Sluigi * actual configuration. 814262152Sluigi */ 815262152Sluigiint 816262152Sluigigeneric_netmap_attach(struct ifnet *ifp) 817262152Sluigi{ 818262152Sluigi struct netmap_adapter *na; 819262152Sluigi struct netmap_generic_adapter *gna; 820262152Sluigi int retval; 821262152Sluigi u_int num_tx_desc, num_rx_desc; 822262152Sluigi 823262152Sluigi num_tx_desc = num_rx_desc = netmap_generic_ringsize; /* starting point */ 824262152Sluigi 825278779Sluigi generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc); /* ignore errors */ 826262152Sluigi ND("Netmap ring size: TX = %d, RX = %d", num_tx_desc, num_rx_desc); 827267282Sluigi if (num_tx_desc == 0 || num_rx_desc == 0) { 828267282Sluigi D("Device has no hw slots (tx %u, rx %u)", num_tx_desc, num_rx_desc); 829267282Sluigi return EINVAL; 830267282Sluigi } 831262152Sluigi 832262152Sluigi gna = malloc(sizeof(*gna), M_DEVBUF, M_NOWAIT | M_ZERO); 833262152Sluigi if (gna == NULL) { 834262152Sluigi D("no memory on attach, give up"); 835262152Sluigi return ENOMEM; 836262152Sluigi } 837262152Sluigi na = (struct netmap_adapter *)gna; 838262152Sluigi na->ifp = ifp; 839262152Sluigi na->num_tx_desc = num_tx_desc; 840262152Sluigi na->num_rx_desc = num_rx_desc; 841262152Sluigi na->nm_register = &generic_netmap_register; 842262152Sluigi na->nm_txsync = &generic_netmap_txsync; 843262152Sluigi na->nm_rxsync = &generic_netmap_rxsync; 844262152Sluigi na->nm_dtor = &generic_netmap_dtor; 845270252Sluigi /* when using generic, NAF_NETMAP_ON is set so we force 846262152Sluigi * NAF_SKIP_INTR to use the regular interrupt handler 847262152Sluigi */ 848262152Sluigi na->na_flags = NAF_SKIP_INTR | NAF_HOST_RINGS; 849262152Sluigi 850262152Sluigi ND("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)", 851262152Sluigi ifp->num_tx_queues, ifp->real_num_tx_queues, 852262152Sluigi ifp->tx_queue_len); 853262152Sluigi ND("[GNA] num_rx_queues(%d), real_num_rx_queues(%d)", 854262152Sluigi ifp->num_rx_queues, ifp->real_num_rx_queues); 855262152Sluigi 856262152Sluigi generic_find_num_queues(ifp, &na->num_tx_rings, &na->num_rx_rings); 857262152Sluigi 858262152Sluigi retval = netmap_attach_common(na); 859262152Sluigi if (retval) { 860262152Sluigi free(gna, M_DEVBUF); 861262152Sluigi } 862262152Sluigi 863262152Sluigi return retval; 864262152Sluigi} 865