1259412Sluigi/* 2260368Sluigi * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. 3259412Sluigi * 4259412Sluigi * Redistribution and use in source and binary forms, with or without 5259412Sluigi * modification, are permitted provided that the following conditions 6259412Sluigi * are met: 7259412Sluigi * 1. Redistributions of source code must retain the above copyright 8259412Sluigi * notice, this list of conditions and the following disclaimer. 9259412Sluigi * 2. Redistributions in binary form must reproduce the above copyright 10259412Sluigi * notice, this list of conditions and the following disclaimer in the 11259412Sluigi * documentation and/or other materials provided with the distribution. 12259412Sluigi * 13259412Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14259412Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15259412Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16259412Sluigi * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17259412Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18259412Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19259412Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20259412Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21259412Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22259412Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23259412Sluigi * SUCH DAMAGE. 24259412Sluigi */ 25259412Sluigi 26259412Sluigi/* 27259412Sluigi * This module implements netmap support on top of standard, 28259412Sluigi * unmodified device drivers. 29259412Sluigi * 30259412Sluigi * A NIOCREGIF request is handled here if the device does not 31259412Sluigi * have native support. TX and RX rings are emulated as follows: 32259412Sluigi * 33259412Sluigi * NIOCREGIF 34259412Sluigi * We preallocate a block of TX mbufs (roughly as many as 35259412Sluigi * tx descriptors; the number is not critical) to speed up 36259412Sluigi * operation during transmissions. The refcount on most of 37259412Sluigi * these buffers is artificially bumped up so we can recycle 38259412Sluigi * them more easily. Also, the destructor is intercepted 39259412Sluigi * so we use it as an interrupt notification to wake up 40259412Sluigi * processes blocked on a poll(). 41259412Sluigi * 42259412Sluigi * For each receive ring we allocate one "struct mbq" 43259412Sluigi * (an mbuf tailq plus a spinlock). We intercept packets 44259412Sluigi * (through if_input) 45259412Sluigi * on the receive path and put them in the mbq from which 46259412Sluigi * netmap receive routines can grab them. 47259412Sluigi * 48259412Sluigi * TX: 49259412Sluigi * in the generic_txsync() routine, netmap buffers are copied 50259412Sluigi * (or linked, in a future) to the preallocated mbufs 51259412Sluigi * and pushed to the transmit queue. Some of these mbufs 52259412Sluigi * (those with NS_REPORT, or otherwise every half ring) 53259412Sluigi * have the refcount=1, others have refcount=2. 54259412Sluigi * When the destructor is invoked, we take that as 55259412Sluigi * a notification that all mbufs up to that one in 56259412Sluigi * the specific ring have been completed, and generate 57259412Sluigi * the equivalent of a transmit interrupt. 58259412Sluigi * 59259412Sluigi * RX: 60259412Sluigi * 61259412Sluigi */ 62259412Sluigi 63259412Sluigi#ifdef __FreeBSD__ 64259412Sluigi 65259412Sluigi#include <sys/cdefs.h> /* prerequisite */ 66259412Sluigi__FBSDID("$FreeBSD: releng/11.0/sys/dev/netmap/netmap_generic.c 298955 2016-05-03 03:41:25Z pfg $"); 67259412Sluigi 68259412Sluigi#include <sys/types.h> 69259412Sluigi#include <sys/errno.h> 70259412Sluigi#include <sys/malloc.h> 71259412Sluigi#include <sys/lock.h> /* PROT_EXEC */ 72259412Sluigi#include <sys/rwlock.h> 73259412Sluigi#include <sys/socket.h> /* sockaddrs */ 74259412Sluigi#include <sys/selinfo.h> 75259412Sluigi#include <net/if.h> 76259412Sluigi#include <net/if_var.h> 77259412Sluigi#include <machine/bus.h> /* bus_dmamap_* in netmap_kern.h */ 78259412Sluigi 79259412Sluigi// XXX temporary - D() defined here 80259412Sluigi#include <net/netmap.h> 81259412Sluigi#include <dev/netmap/netmap_kern.h> 82259412Sluigi#include <dev/netmap/netmap_mem2.h> 83259412Sluigi 84267180Sluigi#define rtnl_lock() ND("rtnl_lock called") 85267180Sluigi#define rtnl_unlock() ND("rtnl_unlock called") 86259412Sluigi#define MBUF_TXQ(m) ((m)->m_pkthdr.flowid) 87261909Sluigi#define MBUF_RXQ(m) ((m)->m_pkthdr.flowid) 88259412Sluigi#define smp_mb() 89259412Sluigi 90259412Sluigi/* 91267163Sluigi * FreeBSD mbuf allocator/deallocator in emulation mode: 92267163Sluigi * 93267163Sluigi * We allocate EXT_PACKET mbuf+clusters, but need to set M_NOFREE 94267163Sluigi * so that the destructor, if invoked, will not free the packet. 95267163Sluigi * In principle we should set the destructor only on demand, 96267163Sluigi * but since there might be a race we better do it on allocation. 97267163Sluigi * As a consequence, we also need to set the destructor or we 98267163Sluigi * would leak buffers. 99259412Sluigi */ 100259412Sluigi 101259412Sluigi/* 102267163Sluigi * mbuf wrappers 103259412Sluigi */ 104259412Sluigi 105270063Sluigi/* mbuf destructor, also need to change the type to EXT_EXTREF, 106259412Sluigi * add an M_NOFREE flag, and then clear the flag and 107259412Sluigi * chain into uma_zfree(zone_pack, mf) 108259412Sluigi * (or reinstall the buffer ?) 109259412Sluigi */ 110259412Sluigi#define SET_MBUF_DESTRUCTOR(m, fn) do { \ 111270063Sluigi (m)->m_ext.ext_free = (void *)fn; \ 112270063Sluigi (m)->m_ext.ext_type = EXT_EXTREF; \ 113270063Sluigi} while (0) 114259412Sluigi 115270063Sluigistatic void 116267328Sluiginetmap_default_mbuf_destructor(struct mbuf *m) 117270063Sluigi{ 118270063Sluigi /* restore original mbuf */ 119270063Sluigi m->m_ext.ext_buf = m->m_data = m->m_ext.ext_arg1; 120270063Sluigi m->m_ext.ext_arg1 = NULL; 121267180Sluigi m->m_ext.ext_type = EXT_PACKET; 122267180Sluigi m->m_ext.ext_free = NULL; 123270063Sluigi if (GET_MBUF_REFCNT(m) == 0) 124270063Sluigi SET_MBUF_REFCNT(m, 1); 125267180Sluigi uma_zfree(zone_pack, m); 126270063Sluigi} 127259412Sluigi 128270063Sluigistatic inline struct mbuf * 129270063Sluiginetmap_get_mbuf(int len) 130270063Sluigi{ 131267180Sluigi struct mbuf *m; 132297298Snp m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); 133267180Sluigi if (m) { 134297298Snp m->m_flags |= M_NOFREE; /* XXXNP: Almost certainly incorrect. */ 135270063Sluigi m->m_ext.ext_arg1 = m->m_ext.ext_buf; // XXX save 136267180Sluigi m->m_ext.ext_free = (void *)netmap_default_mbuf_destructor; 137267180Sluigi m->m_ext.ext_type = EXT_EXTREF; 138270063Sluigi ND(5, "create m %p refcnt %d", m, GET_MBUF_REFCNT(m)); 139267180Sluigi } 140267180Sluigi return m; 141270063Sluigi} 142267180Sluigi 143259412Sluigi 144259412Sluigi 145259412Sluigi#else /* linux */ 146259412Sluigi 147259412Sluigi#include "bsd_glue.h" 148259412Sluigi 149259412Sluigi#include <linux/rtnetlink.h> /* rtnl_[un]lock() */ 150259412Sluigi#include <linux/ethtool.h> /* struct ethtool_ops, get_ringparam */ 151259412Sluigi#include <linux/hrtimer.h> 152259412Sluigi 153259412Sluigi//#define REG_RESET 154259412Sluigi 155259412Sluigi#endif /* linux */ 156259412Sluigi 157259412Sluigi 158259412Sluigi/* Common headers. */ 159259412Sluigi#include <net/netmap.h> 160259412Sluigi#include <dev/netmap/netmap_kern.h> 161259412Sluigi#include <dev/netmap/netmap_mem2.h> 162259412Sluigi 163259412Sluigi 164259412Sluigi 165259412Sluigi/* ======================== usage stats =========================== */ 166259412Sluigi 167270063Sluigi#ifdef RATE_GENERIC 168259412Sluigi#define IFRATE(x) x 169259412Sluigistruct rate_stats { 170260368Sluigi unsigned long txpkt; 171260368Sluigi unsigned long txsync; 172260368Sluigi unsigned long txirq; 173260368Sluigi unsigned long rxpkt; 174260368Sluigi unsigned long rxirq; 175260368Sluigi unsigned long rxsync; 176259412Sluigi}; 177259412Sluigi 178259412Sluigistruct rate_context { 179260368Sluigi unsigned refcount; 180260368Sluigi struct timer_list timer; 181260368Sluigi struct rate_stats new; 182260368Sluigi struct rate_stats old; 183259412Sluigi}; 184259412Sluigi 185259412Sluigi#define RATE_PRINTK(_NAME_) \ 186260368Sluigi printk( #_NAME_ " = %lu Hz\n", (cur._NAME_ - ctx->old._NAME_)/RATE_PERIOD); 187259412Sluigi#define RATE_PERIOD 2 188259412Sluigistatic void rate_callback(unsigned long arg) 189259412Sluigi{ 190260368Sluigi struct rate_context * ctx = (struct rate_context *)arg; 191260368Sluigi struct rate_stats cur = ctx->new; 192260368Sluigi int r; 193259412Sluigi 194260368Sluigi RATE_PRINTK(txpkt); 195260368Sluigi RATE_PRINTK(txsync); 196260368Sluigi RATE_PRINTK(txirq); 197260368Sluigi RATE_PRINTK(rxpkt); 198260368Sluigi RATE_PRINTK(rxsync); 199260368Sluigi RATE_PRINTK(rxirq); 200260368Sluigi printk("\n"); 201259412Sluigi 202260368Sluigi ctx->old = cur; 203260368Sluigi r = mod_timer(&ctx->timer, jiffies + 204260368Sluigi msecs_to_jiffies(RATE_PERIOD * 1000)); 205260368Sluigi if (unlikely(r)) 206260368Sluigi D("[v1000] Error: mod_timer()"); 207259412Sluigi} 208259412Sluigi 209259412Sluigistatic struct rate_context rate_ctx; 210259412Sluigi 211270063Sluigivoid generic_rate(int txp, int txs, int txi, int rxp, int rxs, int rxi) 212270063Sluigi{ 213270063Sluigi if (txp) rate_ctx.new.txpkt++; 214270063Sluigi if (txs) rate_ctx.new.txsync++; 215270063Sluigi if (txi) rate_ctx.new.txirq++; 216270063Sluigi if (rxp) rate_ctx.new.rxpkt++; 217270063Sluigi if (rxs) rate_ctx.new.rxsync++; 218270063Sluigi if (rxi) rate_ctx.new.rxirq++; 219270063Sluigi} 220270063Sluigi 221259412Sluigi#else /* !RATE */ 222259412Sluigi#define IFRATE(x) 223259412Sluigi#endif /* !RATE */ 224259412Sluigi 225259412Sluigi 226259412Sluigi/* =============== GENERIC NETMAP ADAPTER SUPPORT ================= */ 227259412Sluigi 228259412Sluigi/* 229259412Sluigi * Wrapper used by the generic adapter layer to notify 230259412Sluigi * the poller threads. Differently from netmap_rx_irq(), we check 231270063Sluigi * only NAF_NETMAP_ON instead of NAF_NATIVE_ON to enable the irq. 232259412Sluigi */ 233259412Sluigistatic void 234259412Sluiginetmap_generic_irq(struct ifnet *ifp, u_int q, u_int *work_done) 235259412Sluigi{ 236270063Sluigi struct netmap_adapter *na = NA(ifp); 237270063Sluigi if (unlikely(!nm_netmap_on(na))) 238259412Sluigi return; 239259412Sluigi 240260368Sluigi netmap_common_irq(ifp, q, work_done); 241259412Sluigi} 242259412Sluigi 243259412Sluigi 244259412Sluigi/* Enable/disable netmap mode for a generic network interface. */ 245260368Sluigistatic int 246260368Sluigigeneric_netmap_register(struct netmap_adapter *na, int enable) 247259412Sluigi{ 248260368Sluigi struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na; 249260368Sluigi struct mbuf *m; 250260368Sluigi int error; 251260368Sluigi int i, r; 252259412Sluigi 253260368Sluigi if (!na) 254260368Sluigi return EINVAL; 255259412Sluigi 256259412Sluigi#ifdef REG_RESET 257260368Sluigi error = ifp->netdev_ops->ndo_stop(ifp); 258260368Sluigi if (error) { 259260368Sluigi return error; 260260368Sluigi } 261259412Sluigi#endif /* REG_RESET */ 262259412Sluigi 263260368Sluigi if (enable) { /* Enable netmap mode. */ 264267180Sluigi /* Init the mitigation support on all the rx queues. */ 265261909Sluigi gna->mit = malloc(na->num_rx_rings * sizeof(struct nm_generic_mit), 266261909Sluigi M_DEVBUF, M_NOWAIT | M_ZERO); 267261909Sluigi if (!gna->mit) { 268261909Sluigi D("mitigation allocation failed"); 269261909Sluigi error = ENOMEM; 270261909Sluigi goto out; 271261909Sluigi } 272261909Sluigi for (r=0; r<na->num_rx_rings; r++) 273270063Sluigi netmap_mitigation_init(&gna->mit[r], r, na); 274261909Sluigi 275260368Sluigi /* Initialize the rx queue, as generic_rx_handler() can 276260368Sluigi * be called as soon as netmap_catch_rx() returns. 277260368Sluigi */ 278260368Sluigi for (r=0; r<na->num_rx_rings; r++) { 279260368Sluigi mbq_safe_init(&na->rx_rings[r].rx_queue); 280260368Sluigi } 281259412Sluigi 282260368Sluigi /* 283260368Sluigi * Preallocate packet buffers for the tx rings. 284260368Sluigi */ 285260368Sluigi for (r=0; r<na->num_tx_rings; r++) 286260368Sluigi na->tx_rings[r].tx_pool = NULL; 287260368Sluigi for (r=0; r<na->num_tx_rings; r++) { 288260368Sluigi na->tx_rings[r].tx_pool = malloc(na->num_tx_desc * sizeof(struct mbuf *), 289260368Sluigi M_DEVBUF, M_NOWAIT | M_ZERO); 290260368Sluigi if (!na->tx_rings[r].tx_pool) { 291260368Sluigi D("tx_pool allocation failed"); 292260368Sluigi error = ENOMEM; 293260368Sluigi goto free_tx_pools; 294260368Sluigi } 295260368Sluigi for (i=0; i<na->num_tx_desc; i++) 296260368Sluigi na->tx_rings[r].tx_pool[i] = NULL; 297260368Sluigi for (i=0; i<na->num_tx_desc; i++) { 298270063Sluigi m = netmap_get_mbuf(NETMAP_BUF_SIZE(na)); 299260368Sluigi if (!m) { 300260368Sluigi D("tx_pool[%d] allocation failed", i); 301260368Sluigi error = ENOMEM; 302260368Sluigi goto free_tx_pools; 303260368Sluigi } 304260368Sluigi na->tx_rings[r].tx_pool[i] = m; 305260368Sluigi } 306260368Sluigi } 307260368Sluigi rtnl_lock(); 308260368Sluigi /* Prepare to intercept incoming traffic. */ 309285349Sluigi error = netmap_catch_rx(gna, 1); 310260368Sluigi if (error) { 311260700Sluigi D("netdev_rx_handler_register() failed (%d)", error); 312260368Sluigi goto register_handler; 313260368Sluigi } 314270063Sluigi na->na_flags |= NAF_NETMAP_ON; 315259412Sluigi 316260368Sluigi /* Make netmap control the packet steering. */ 317260368Sluigi netmap_catch_tx(gna, 1); 318259412Sluigi 319260368Sluigi rtnl_unlock(); 320259412Sluigi 321270063Sluigi#ifdef RATE_GENERIC 322260368Sluigi if (rate_ctx.refcount == 0) { 323260368Sluigi D("setup_timer()"); 324260368Sluigi memset(&rate_ctx, 0, sizeof(rate_ctx)); 325260368Sluigi setup_timer(&rate_ctx.timer, &rate_callback, (unsigned long)&rate_ctx); 326260368Sluigi if (mod_timer(&rate_ctx.timer, jiffies + msecs_to_jiffies(1500))) { 327260368Sluigi D("Error: mod_timer()"); 328260368Sluigi } 329260368Sluigi } 330260368Sluigi rate_ctx.refcount++; 331259412Sluigi#endif /* RATE */ 332259412Sluigi 333260700Sluigi } else if (na->tx_rings[0].tx_pool) { 334260700Sluigi /* Disable netmap mode. We enter here only if the previous 335298955Spfg generic_netmap_register(na, 1) was successful. 336260700Sluigi If it was not, na->tx_rings[0].tx_pool was set to NULL by the 337260700Sluigi error handling code below. */ 338260368Sluigi rtnl_lock(); 339259412Sluigi 340270063Sluigi na->na_flags &= ~NAF_NETMAP_ON; 341259412Sluigi 342260368Sluigi /* Release packet steering control. */ 343260368Sluigi netmap_catch_tx(gna, 0); 344259412Sluigi 345260368Sluigi /* Do not intercept packets on the rx path. */ 346285349Sluigi netmap_catch_rx(gna, 0); 347259412Sluigi 348260368Sluigi rtnl_unlock(); 349259412Sluigi 350260368Sluigi /* Free the mbufs going to the netmap rings */ 351260368Sluigi for (r=0; r<na->num_rx_rings; r++) { 352260368Sluigi mbq_safe_purge(&na->rx_rings[r].rx_queue); 353260368Sluigi mbq_safe_destroy(&na->rx_rings[r].rx_queue); 354260368Sluigi } 355259412Sluigi 356261909Sluigi for (r=0; r<na->num_rx_rings; r++) 357261909Sluigi netmap_mitigation_cleanup(&gna->mit[r]); 358261909Sluigi free(gna->mit, M_DEVBUF); 359259412Sluigi 360260368Sluigi for (r=0; r<na->num_tx_rings; r++) { 361260368Sluigi for (i=0; i<na->num_tx_desc; i++) { 362260368Sluigi m_freem(na->tx_rings[r].tx_pool[i]); 363260368Sluigi } 364260368Sluigi free(na->tx_rings[r].tx_pool, M_DEVBUF); 365260368Sluigi } 366259412Sluigi 367270063Sluigi#ifdef RATE_GENERIC 368260368Sluigi if (--rate_ctx.refcount == 0) { 369260368Sluigi D("del_timer()"); 370260368Sluigi del_timer(&rate_ctx.timer); 371260368Sluigi } 372259412Sluigi#endif 373260368Sluigi } 374259412Sluigi 375259412Sluigi#ifdef REG_RESET 376260368Sluigi error = ifp->netdev_ops->ndo_open(ifp); 377260368Sluigi if (error) { 378260700Sluigi goto free_tx_pools; 379260368Sluigi } 380259412Sluigi#endif 381259412Sluigi 382260368Sluigi return 0; 383259412Sluigi 384259412Sluigiregister_handler: 385260368Sluigi rtnl_unlock(); 386260368Sluigifree_tx_pools: 387260368Sluigi for (r=0; r<na->num_tx_rings; r++) { 388260368Sluigi if (na->tx_rings[r].tx_pool == NULL) 389260368Sluigi continue; 390260368Sluigi for (i=0; i<na->num_tx_desc; i++) 391260368Sluigi if (na->tx_rings[r].tx_pool[i]) 392260368Sluigi m_freem(na->tx_rings[r].tx_pool[i]); 393260368Sluigi free(na->tx_rings[r].tx_pool, M_DEVBUF); 394260700Sluigi na->tx_rings[r].tx_pool = NULL; 395260368Sluigi } 396260700Sluigi for (r=0; r<na->num_rx_rings; r++) { 397261909Sluigi netmap_mitigation_cleanup(&gna->mit[r]); 398260700Sluigi mbq_safe_destroy(&na->rx_rings[r].rx_queue); 399260700Sluigi } 400261909Sluigi free(gna->mit, M_DEVBUF); 401261909Sluigiout: 402259412Sluigi 403260368Sluigi return error; 404259412Sluigi} 405259412Sluigi 406259412Sluigi/* 407259412Sluigi * Callback invoked when the device driver frees an mbuf used 408259412Sluigi * by netmap to transmit a packet. This usually happens when 409259412Sluigi * the NIC notifies the driver that transmission is completed. 410259412Sluigi */ 411259412Sluigistatic void 412259412Sluigigeneric_mbuf_destructor(struct mbuf *m) 413259412Sluigi{ 414260368Sluigi netmap_generic_irq(MBUF_IFP(m), MBUF_TXQ(m), NULL); 415259412Sluigi#ifdef __FreeBSD__ 416267180Sluigi if (netmap_verbose) 417267180Sluigi RD(5, "Tx irq (%p) queue %d index %d" , m, MBUF_TXQ(m), (int)(uintptr_t)m->m_ext.ext_arg1); 418267180Sluigi netmap_default_mbuf_destructor(m); 419259412Sluigi#endif /* __FreeBSD__ */ 420260368Sluigi IFRATE(rate_ctx.new.txirq++); 421259412Sluigi} 422259412Sluigi 423270063Sluigiextern int netmap_adaptive_io; 424270063Sluigi 425260368Sluigi/* Record completed transmissions and update hwtail. 426259412Sluigi * 427260368Sluigi * The oldest tx buffer not yet completed is at nr_hwtail + 1, 428259412Sluigi * nr_hwcur is the first unsent buffer. 429259412Sluigi */ 430260368Sluigistatic u_int 431259412Sluigigeneric_netmap_tx_clean(struct netmap_kring *kring) 432259412Sluigi{ 433260368Sluigi u_int const lim = kring->nkr_num_slots - 1; 434260368Sluigi u_int nm_i = nm_next(kring->nr_hwtail, lim); 435260368Sluigi u_int hwcur = kring->nr_hwcur; 436260368Sluigi u_int n = 0; 437260368Sluigi struct mbuf **tx_pool = kring->tx_pool; 438259412Sluigi 439260368Sluigi while (nm_i != hwcur) { /* buffers not completed */ 440260368Sluigi struct mbuf *m = tx_pool[nm_i]; 441259412Sluigi 442260368Sluigi if (unlikely(m == NULL)) { 443260368Sluigi /* this is done, try to replenish the entry */ 444270063Sluigi tx_pool[nm_i] = m = netmap_get_mbuf(NETMAP_BUF_SIZE(kring->na)); 445260368Sluigi if (unlikely(m == NULL)) { 446260368Sluigi D("mbuf allocation failed, XXX error"); 447260368Sluigi // XXX how do we proceed ? break ? 448260368Sluigi return -ENOMEM; 449260368Sluigi } 450260368Sluigi } else if (GET_MBUF_REFCNT(m) != 1) { 451260368Sluigi break; /* This mbuf is still busy: its refcnt is 2. */ 452260368Sluigi } 453260368Sluigi n++; 454260368Sluigi nm_i = nm_next(nm_i, lim); 455270063Sluigi#if 0 /* rate adaptation */ 456270063Sluigi if (netmap_adaptive_io > 1) { 457270063Sluigi if (n >= netmap_adaptive_io) 458270063Sluigi break; 459270063Sluigi } else if (netmap_adaptive_io) { 460270063Sluigi /* if hwcur - nm_i < lim/8 do an early break 461270063Sluigi * so we prevent the sender from stalling. See CVT. 462270063Sluigi */ 463270063Sluigi if (hwcur >= nm_i) { 464270063Sluigi if (hwcur - nm_i < lim/2) 465270063Sluigi break; 466270063Sluigi } else { 467270063Sluigi if (hwcur + lim + 1 - nm_i < lim/2) 468270063Sluigi break; 469270063Sluigi } 470270063Sluigi } 471270063Sluigi#endif 472259412Sluigi } 473260368Sluigi kring->nr_hwtail = nm_prev(nm_i, lim); 474260368Sluigi ND("tx completed [%d] -> hwtail %d", n, kring->nr_hwtail); 475259412Sluigi 476260368Sluigi return n; 477259412Sluigi} 478259412Sluigi 479259412Sluigi 480259412Sluigi/* 481260368Sluigi * We have pending packets in the driver between nr_hwtail +1 and hwcur. 482259412Sluigi * Compute a position in the middle, to be used to generate 483259412Sluigi * a notification. 484259412Sluigi */ 485259412Sluigistatic inline u_int 486259412Sluigigeneric_tx_event_middle(struct netmap_kring *kring, u_int hwcur) 487259412Sluigi{ 488260368Sluigi u_int n = kring->nkr_num_slots; 489260368Sluigi u_int ntc = nm_next(kring->nr_hwtail, n-1); 490260368Sluigi u_int e; 491259412Sluigi 492260368Sluigi if (hwcur >= ntc) { 493260368Sluigi e = (hwcur + ntc) / 2; 494260368Sluigi } else { /* wrap around */ 495260368Sluigi e = (hwcur + n + ntc) / 2; 496260368Sluigi if (e >= n) { 497260368Sluigi e -= n; 498260368Sluigi } 499260368Sluigi } 500259412Sluigi 501260368Sluigi if (unlikely(e >= n)) { 502260368Sluigi D("This cannot happen"); 503260368Sluigi e = 0; 504260368Sluigi } 505259412Sluigi 506260368Sluigi return e; 507259412Sluigi} 508259412Sluigi 509259412Sluigi/* 510260368Sluigi * We have pending packets in the driver between nr_hwtail+1 and hwcur. 511259412Sluigi * Schedule a notification approximately in the middle of the two. 512259412Sluigi * There is a race but this is only called within txsync which does 513259412Sluigi * a double check. 514259412Sluigi */ 515259412Sluigistatic void 516259412Sluigigeneric_set_tx_event(struct netmap_kring *kring, u_int hwcur) 517259412Sluigi{ 518260368Sluigi struct mbuf *m; 519260368Sluigi u_int e; 520259412Sluigi 521260368Sluigi if (nm_next(kring->nr_hwtail, kring->nkr_num_slots -1) == hwcur) { 522260368Sluigi return; /* all buffers are free */ 523260368Sluigi } 524260368Sluigi e = generic_tx_event_middle(kring, hwcur); 525259412Sluigi 526260368Sluigi m = kring->tx_pool[e]; 527267180Sluigi ND(5, "Request Event at %d mbuf %p refcnt %d", e, m, m ? GET_MBUF_REFCNT(m) : -2 ); 528260368Sluigi if (m == NULL) { 529260368Sluigi /* This can happen if there is already an event on the netmap 530260368Sluigi slot 'e': There is nothing to do. */ 531260368Sluigi return; 532260368Sluigi } 533260368Sluigi kring->tx_pool[e] = NULL; 534260368Sluigi SET_MBUF_DESTRUCTOR(m, generic_mbuf_destructor); 535259412Sluigi 536260368Sluigi // XXX wmb() ? 537260368Sluigi /* Decrement the refcount an free it if we have the last one. */ 538260368Sluigi m_freem(m); 539260368Sluigi smp_mb(); 540259412Sluigi} 541259412Sluigi 542259412Sluigi 543259412Sluigi/* 544259412Sluigi * generic_netmap_txsync() transforms netmap buffers into mbufs 545259412Sluigi * and passes them to the standard device driver 546259412Sluigi * (ndo_start_xmit() or ifp->if_transmit() ). 547259412Sluigi * On linux this is not done directly, but using dev_queue_xmit(), 548259412Sluigi * since it implements the TX flow control (and takes some locks). 549259412Sluigi */ 550259412Sluigistatic int 551270063Sluigigeneric_netmap_txsync(struct netmap_kring *kring, int flags) 552259412Sluigi{ 553270063Sluigi struct netmap_adapter *na = kring->na; 554260368Sluigi struct ifnet *ifp = na->ifp; 555260368Sluigi struct netmap_ring *ring = kring->ring; 556260368Sluigi u_int nm_i; /* index into the netmap ring */ // j 557260368Sluigi u_int const lim = kring->nkr_num_slots - 1; 558260368Sluigi u_int const head = kring->rhead; 559270063Sluigi u_int ring_nr = kring->ring_id; 560259412Sluigi 561260368Sluigi IFRATE(rate_ctx.new.txsync++); 562259412Sluigi 563260368Sluigi // TODO: handle the case of mbuf allocation failure 564259412Sluigi 565260368Sluigi rmb(); 566259412Sluigi 567260368Sluigi /* 568260368Sluigi * First part: process new packets to send. 569259412Sluigi */ 570260368Sluigi nm_i = kring->nr_hwcur; 571260368Sluigi if (nm_i != head) { /* we have new packets to send */ 572260368Sluigi while (nm_i != head) { 573260368Sluigi struct netmap_slot *slot = &ring->slot[nm_i]; 574260368Sluigi u_int len = slot->len; 575270063Sluigi void *addr = NMB(na, slot); 576259412Sluigi 577260368Sluigi /* device-specific */ 578260368Sluigi struct mbuf *m; 579260368Sluigi int tx_ret; 580259412Sluigi 581270063Sluigi NM_CHECK_ADDR_LEN(na, addr, len); 582259412Sluigi 583260368Sluigi /* Tale a mbuf from the tx pool and copy in the user packet. */ 584260368Sluigi m = kring->tx_pool[nm_i]; 585260368Sluigi if (unlikely(!m)) { 586260368Sluigi RD(5, "This should never happen"); 587270063Sluigi kring->tx_pool[nm_i] = m = netmap_get_mbuf(NETMAP_BUF_SIZE(na)); 588260368Sluigi if (unlikely(m == NULL)) { 589260368Sluigi D("mbuf allocation failed"); 590260368Sluigi break; 591260368Sluigi } 592260368Sluigi } 593260368Sluigi /* XXX we should ask notifications when NS_REPORT is set, 594260368Sluigi * or roughly every half frame. We can optimize this 595260368Sluigi * by lazily requesting notifications only when a 596260368Sluigi * transmission fails. Probably the best way is to 597260368Sluigi * break on failures and set notifications when 598260368Sluigi * ring->cur == ring->tail || nm_i != cur 599260368Sluigi */ 600260368Sluigi tx_ret = generic_xmit_frame(ifp, m, addr, len, ring_nr); 601260368Sluigi if (unlikely(tx_ret)) { 602270063Sluigi ND(5, "start_xmit failed: err %d [nm_i %u, head %u, hwtail %u]", 603260368Sluigi tx_ret, nm_i, head, kring->nr_hwtail); 604260368Sluigi /* 605260368Sluigi * No room for this mbuf in the device driver. 606260368Sluigi * Request a notification FOR A PREVIOUS MBUF, 607260368Sluigi * then call generic_netmap_tx_clean(kring) to do the 608260368Sluigi * double check and see if we can free more buffers. 609260368Sluigi * If there is space continue, else break; 610260368Sluigi * NOTE: the double check is necessary if the problem 611260368Sluigi * occurs in the txsync call after selrecord(). 612260368Sluigi * Also, we need some way to tell the caller that not 613260368Sluigi * all buffers were queued onto the device (this was 614260368Sluigi * not a problem with native netmap driver where space 615260368Sluigi * is preallocated). The bridge has a similar problem 616260368Sluigi * and we solve it there by dropping the excess packets. 617260368Sluigi */ 618260368Sluigi generic_set_tx_event(kring, nm_i); 619260368Sluigi if (generic_netmap_tx_clean(kring)) { /* space now available */ 620260368Sluigi continue; 621260368Sluigi } else { 622260368Sluigi break; 623260368Sluigi } 624260368Sluigi } 625260368Sluigi slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); 626260368Sluigi nm_i = nm_next(nm_i, lim); 627261909Sluigi IFRATE(rate_ctx.new.txpkt ++); 628260368Sluigi } 629260368Sluigi 630260368Sluigi /* Update hwcur to the next slot to transmit. */ 631260368Sluigi kring->nr_hwcur = nm_i; /* not head, we could break early */ 632260368Sluigi } 633260368Sluigi 634260368Sluigi /* 635260368Sluigi * Second, reclaim completed buffers 636259412Sluigi */ 637260368Sluigi if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) { 638260368Sluigi /* No more available slots? Set a notification event 639260368Sluigi * on a netmap slot that will be cleaned in the future. 640260368Sluigi * No doublecheck is performed, since txsync() will be 641260368Sluigi * called twice by netmap_poll(). 642260368Sluigi */ 643260368Sluigi generic_set_tx_event(kring, nm_i); 644260368Sluigi } 645260368Sluigi ND("tx #%d, hwtail = %d", n, kring->nr_hwtail); 646259412Sluigi 647260368Sluigi generic_netmap_tx_clean(kring); 648259412Sluigi 649260368Sluigi return 0; 650259412Sluigi} 651259412Sluigi 652260368Sluigi 653259412Sluigi/* 654259412Sluigi * This handler is registered (through netmap_catch_rx()) 655259412Sluigi * within the attached network interface 656259412Sluigi * in the RX subsystem, so that every mbuf passed up by 657259412Sluigi * the driver can be stolen to the network stack. 658259412Sluigi * Stolen packets are put in a queue where the 659259412Sluigi * generic_netmap_rxsync() callback can extract them. 660259412Sluigi */ 661260368Sluigivoid 662260368Sluigigeneric_rx_handler(struct ifnet *ifp, struct mbuf *m) 663259412Sluigi{ 664260368Sluigi struct netmap_adapter *na = NA(ifp); 665260368Sluigi struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na; 666260368Sluigi u_int work_done; 667261909Sluigi u_int rr = MBUF_RXQ(m); // receive ring number 668259412Sluigi 669261909Sluigi if (rr >= na->num_rx_rings) { 670261909Sluigi rr = rr % na->num_rx_rings; // XXX expensive... 671261909Sluigi } 672261909Sluigi 673260368Sluigi /* limit the size of the queue */ 674260368Sluigi if (unlikely(mbq_len(&na->rx_rings[rr].rx_queue) > 1024)) { 675260368Sluigi m_freem(m); 676260368Sluigi } else { 677260368Sluigi mbq_safe_enqueue(&na->rx_rings[rr].rx_queue, m); 678260368Sluigi } 679259412Sluigi 680260368Sluigi if (netmap_generic_mit < 32768) { 681260368Sluigi /* no rx mitigation, pass notification up */ 682260368Sluigi netmap_generic_irq(na->ifp, rr, &work_done); 683260368Sluigi IFRATE(rate_ctx.new.rxirq++); 684260368Sluigi } else { 685260368Sluigi /* same as send combining, filter notification if there is a 686260368Sluigi * pending timer, otherwise pass it up and start a timer. 687260368Sluigi */ 688261909Sluigi if (likely(netmap_mitigation_active(&gna->mit[rr]))) { 689260368Sluigi /* Record that there is some pending work. */ 690261909Sluigi gna->mit[rr].mit_pending = 1; 691260368Sluigi } else { 692260368Sluigi netmap_generic_irq(na->ifp, rr, &work_done); 693260368Sluigi IFRATE(rate_ctx.new.rxirq++); 694261909Sluigi netmap_mitigation_start(&gna->mit[rr]); 695260368Sluigi } 696260368Sluigi } 697259412Sluigi} 698259412Sluigi 699259412Sluigi/* 700259412Sluigi * generic_netmap_rxsync() extracts mbufs from the queue filled by 701259412Sluigi * generic_netmap_rx_handler() and puts their content in the netmap 702259412Sluigi * receive ring. 703259412Sluigi * Access must be protected because the rx handler is asynchronous, 704259412Sluigi */ 705259412Sluigistatic int 706270063Sluigigeneric_netmap_rxsync(struct netmap_kring *kring, int flags) 707259412Sluigi{ 708260368Sluigi struct netmap_ring *ring = kring->ring; 709270063Sluigi struct netmap_adapter *na = kring->na; 710260368Sluigi u_int nm_i; /* index into the netmap ring */ //j, 711260368Sluigi u_int n; 712260368Sluigi u_int const lim = kring->nkr_num_slots - 1; 713285349Sluigi u_int const head = kring->rhead; 714260368Sluigi int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; 715259412Sluigi 716260368Sluigi if (head > lim) 717260368Sluigi return netmap_ring_reinit(kring); 718259412Sluigi 719260368Sluigi /* 720260368Sluigi * First part: import newly received packets. 721259412Sluigi */ 722260368Sluigi if (netmap_no_pendintr || force_update) { 723260368Sluigi /* extract buffers from the rx queue, stop at most one 724260368Sluigi * slot before nr_hwcur (stop_i) 725260368Sluigi */ 726260368Sluigi uint16_t slot_flags = kring->nkr_slot_flags; 727260368Sluigi u_int stop_i = nm_prev(kring->nr_hwcur, lim); 728259412Sluigi 729260368Sluigi nm_i = kring->nr_hwtail; /* first empty slot in the receive ring */ 730260368Sluigi for (n = 0; nm_i != stop_i; n++) { 731260368Sluigi int len; 732270063Sluigi void *addr = NMB(na, &ring->slot[nm_i]); 733260368Sluigi struct mbuf *m; 734259412Sluigi 735260368Sluigi /* we only check the address here on generic rx rings */ 736270063Sluigi if (addr == NETMAP_BUF_BASE(na)) { /* Bad buffer */ 737260368Sluigi return netmap_ring_reinit(kring); 738260368Sluigi } 739260368Sluigi /* 740260368Sluigi * Call the locked version of the function. 741260368Sluigi * XXX Ideally we could grab a batch of mbufs at once 742260368Sluigi * and save some locking overhead. 743260368Sluigi */ 744260368Sluigi m = mbq_safe_dequeue(&kring->rx_queue); 745260368Sluigi if (!m) /* no more data */ 746260368Sluigi break; 747260368Sluigi len = MBUF_LEN(m); 748260368Sluigi m_copydata(m, 0, len, addr); 749260368Sluigi ring->slot[nm_i].len = len; 750260368Sluigi ring->slot[nm_i].flags = slot_flags; 751260368Sluigi m_freem(m); 752260368Sluigi nm_i = nm_next(nm_i, lim); 753260368Sluigi } 754260368Sluigi if (n) { 755260368Sluigi kring->nr_hwtail = nm_i; 756260368Sluigi IFRATE(rate_ctx.new.rxpkt += n); 757260368Sluigi } 758260368Sluigi kring->nr_kflags &= ~NKR_PENDINTR; 759260368Sluigi } 760259412Sluigi 761260368Sluigi // XXX should we invert the order ? 762260368Sluigi /* 763260368Sluigi * Second part: skip past packets that userspace has released. 764260368Sluigi */ 765260368Sluigi nm_i = kring->nr_hwcur; 766260368Sluigi if (nm_i != head) { 767260368Sluigi /* Userspace has released some packets. */ 768260368Sluigi for (n = 0; nm_i != head; n++) { 769260368Sluigi struct netmap_slot *slot = &ring->slot[nm_i]; 770259412Sluigi 771260368Sluigi slot->flags &= ~NS_BUF_CHANGED; 772260368Sluigi nm_i = nm_next(nm_i, lim); 773260368Sluigi } 774260368Sluigi kring->nr_hwcur = head; 775260368Sluigi } 776260368Sluigi IFRATE(rate_ctx.new.rxsync++); 777260368Sluigi 778260368Sluigi return 0; 779259412Sluigi} 780259412Sluigi 781259412Sluigistatic void 782259412Sluigigeneric_netmap_dtor(struct netmap_adapter *na) 783259412Sluigi{ 784260368Sluigi struct netmap_generic_adapter *gna = (struct netmap_generic_adapter*)na; 785285349Sluigi struct ifnet *ifp = netmap_generic_getifp(gna); 786260368Sluigi struct netmap_adapter *prev_na = gna->prev; 787259412Sluigi 788260368Sluigi if (prev_na != NULL) { 789260368Sluigi D("Released generic NA %p", gna); 790285349Sluigi if_rele(ifp); 791260368Sluigi netmap_adapter_put(prev_na); 792285349Sluigi if (na->ifp == NULL) { 793285349Sluigi /* 794285349Sluigi * The driver has been removed without releasing 795285349Sluigi * the reference so we need to do it here. 796285349Sluigi */ 797285349Sluigi netmap_adapter_put(prev_na); 798285349Sluigi } 799260368Sluigi } 800285349Sluigi WNA(ifp) = prev_na; 801285349Sluigi D("Restored native NA %p", prev_na); 802285349Sluigi na->ifp = NULL; 803259412Sluigi} 804259412Sluigi 805259412Sluigi/* 806259412Sluigi * generic_netmap_attach() makes it possible to use netmap on 807259412Sluigi * a device without native netmap support. 808259412Sluigi * This is less performant than native support but potentially 809259412Sluigi * faster than raw sockets or similar schemes. 810259412Sluigi * 811259412Sluigi * In this "emulated" mode, netmap rings do not necessarily 812259412Sluigi * have the same size as those in the NIC. We use a default 813259412Sluigi * value and possibly override it if the OS has ways to fetch the 814259412Sluigi * actual configuration. 815259412Sluigi */ 816259412Sluigiint 817259412Sluigigeneric_netmap_attach(struct ifnet *ifp) 818259412Sluigi{ 819260368Sluigi struct netmap_adapter *na; 820260368Sluigi struct netmap_generic_adapter *gna; 821260368Sluigi int retval; 822260368Sluigi u_int num_tx_desc, num_rx_desc; 823259412Sluigi 824260368Sluigi num_tx_desc = num_rx_desc = netmap_generic_ringsize; /* starting point */ 825259412Sluigi 826274353Sluigi generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc); /* ignore errors */ 827260368Sluigi ND("Netmap ring size: TX = %d, RX = %d", num_tx_desc, num_rx_desc); 828267180Sluigi if (num_tx_desc == 0 || num_rx_desc == 0) { 829267180Sluigi D("Device has no hw slots (tx %u, rx %u)", num_tx_desc, num_rx_desc); 830267180Sluigi return EINVAL; 831267180Sluigi } 832259412Sluigi 833260368Sluigi gna = malloc(sizeof(*gna), M_DEVBUF, M_NOWAIT | M_ZERO); 834260368Sluigi if (gna == NULL) { 835260368Sluigi D("no memory on attach, give up"); 836260368Sluigi return ENOMEM; 837260368Sluigi } 838260368Sluigi na = (struct netmap_adapter *)gna; 839285349Sluigi strncpy(na->name, ifp->if_xname, sizeof(na->name)); 840260368Sluigi na->ifp = ifp; 841260368Sluigi na->num_tx_desc = num_tx_desc; 842260368Sluigi na->num_rx_desc = num_rx_desc; 843260368Sluigi na->nm_register = &generic_netmap_register; 844260368Sluigi na->nm_txsync = &generic_netmap_txsync; 845260368Sluigi na->nm_rxsync = &generic_netmap_rxsync; 846260368Sluigi na->nm_dtor = &generic_netmap_dtor; 847270063Sluigi /* when using generic, NAF_NETMAP_ON is set so we force 848260368Sluigi * NAF_SKIP_INTR to use the regular interrupt handler 849260368Sluigi */ 850261909Sluigi na->na_flags = NAF_SKIP_INTR | NAF_HOST_RINGS; 851259412Sluigi 852260368Sluigi ND("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)", 853260368Sluigi ifp->num_tx_queues, ifp->real_num_tx_queues, 854260368Sluigi ifp->tx_queue_len); 855260368Sluigi ND("[GNA] num_rx_queues(%d), real_num_rx_queues(%d)", 856260368Sluigi ifp->num_rx_queues, ifp->real_num_rx_queues); 857259412Sluigi 858260368Sluigi generic_find_num_queues(ifp, &na->num_tx_rings, &na->num_rx_rings); 859259412Sluigi 860260368Sluigi retval = netmap_attach_common(na); 861260368Sluigi if (retval) { 862260368Sluigi free(gna, M_DEVBUF); 863260368Sluigi } 864259412Sluigi 865260368Sluigi return retval; 866259412Sluigi} 867