netmap_generic.c revision 262152
149076Swpaul/* 249076Swpaul * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. 349076Swpaul * 449076Swpaul * Redistribution and use in source and binary forms, with or without 549076Swpaul * modification, are permitted provided that the following conditions 649076Swpaul * are met: 749076Swpaul * 1. Redistributions of source code must retain the above copyright 849076Swpaul * notice, this list of conditions and the following disclaimer. 949076Swpaul * 2. Redistributions in binary form must reproduce the above copyright 1049076Swpaul * notice, this list of conditions and the following disclaimer in the 1149076Swpaul * documentation and/or other materials provided with the distribution. 1249076Swpaul * 1349076Swpaul * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1449076Swpaul * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1549076Swpaul * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1649076Swpaul * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 1749076Swpaul * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1849076Swpaul * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 1949076Swpaul * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2049076Swpaul * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2149076Swpaul * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2249076Swpaul * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2349076Swpaul * SUCH DAMAGE. 2449076Swpaul */ 2549076Swpaul 2649076Swpaul/* 2749076Swpaul * This module implements netmap support on top of standard, 2849076Swpaul * unmodified device drivers. 2949076Swpaul * 3049076Swpaul * A NIOCREGIF request is handled here if the device does not 3149076Swpaul * have native support. TX and RX rings are emulated as follows: 3250477Speter * 3349076Swpaul * NIOCREGIF 3449076Swpaul * We preallocate a block of TX mbufs (roughly as many as 3549076Swpaul * tx descriptors; the number is not critical) to speed up 3649076Swpaul * operation during transmissions. The refcount on most of 3749076Swpaul * these buffers is artificially bumped up so we can recycle 3849076Swpaul * them more easily. Also, the destructor is intercepted 3949076Swpaul * so we use it as an interrupt notification to wake up 4049076Swpaul * processes blocked on a poll(). 4149076Swpaul * 4249076Swpaul * For each receive ring we allocate one "struct mbq" 4349076Swpaul * (an mbuf tailq plus a spinlock). We intercept packets 4449076Swpaul * (through if_input) 4549076Swpaul * on the receive path and put them in the mbq from which 4649076Swpaul * netmap receive routines can grab them. 4749076Swpaul * 4849076Swpaul * TX: 4949076Swpaul * in the generic_txsync() routine, netmap buffers are copied 5049076Swpaul * (or linked, in a future) to the preallocated mbufs 5149076Swpaul * and pushed to the transmit queue. Some of these mbufs 5249076Swpaul * (those with NS_REPORT, or otherwise every half ring) 5349076Swpaul * have the refcount=1, others have refcount=2. 5449076Swpaul * When the destructor is invoked, we take that as 5549076Swpaul * a notification that all mbufs up to that one in 5649076Swpaul * the specific ring have been completed, and generate 5749076Swpaul * the equivalent of a transmit interrupt. 5849076Swpaul * 5949076Swpaul * RX: 6049076Swpaul * 6149076Swpaul */ 6249076Swpaul 6353468Swpaul#ifdef __FreeBSD__ 6453468Swpaul 6553468Swpaul#include <sys/cdefs.h> /* prerequisite */ 6649076Swpaul__FBSDID("$FreeBSD: stable/10/sys/dev/netmap/netmap_generic.c 262152 2014-02-18 05:46:19Z luigi $"); 6749076Swpaul 6849076Swpaul#include <sys/types.h> 6949076Swpaul#include <sys/errno.h> 7053468Swpaul#include <sys/malloc.h> 7153468Swpaul#include <sys/lock.h> /* PROT_EXEC */ 7249076Swpaul#include <sys/rwlock.h> 7349076Swpaul#include <sys/socket.h> /* sockaddrs */ 7449076Swpaul#include <sys/selinfo.h> 7549076Swpaul#include <net/if.h> 7649076Swpaul#include <net/if_var.h> 7749076Swpaul#include <machine/bus.h> /* bus_dmamap_* in netmap_kern.h */ 7849076Swpaul 7949076Swpaul// XXX temporary - D() defined here 8049076Swpaul#include <net/netmap.h> 8149076Swpaul#include <dev/netmap/netmap_kern.h> 8249076Swpaul#include <dev/netmap/netmap_mem2.h> 8349076Swpaul 8449076Swpaul#define rtnl_lock() D("rtnl_lock called"); 8549076Swpaul#define rtnl_unlock() D("rtnl_unlock called"); 8649076Swpaul#define MBUF_TXQ(m) ((m)->m_pkthdr.flowid) 8749076Swpaul#define MBUF_RXQ(m) ((m)->m_pkthdr.flowid) 8849076Swpaul#define smp_mb() 8949076Swpaul 9049076Swpaul/* 9149076Swpaul * mbuf wrappers 9249076Swpaul */ 9349076Swpaul 9449076Swpaul/* 9549076Swpaul * we allocate an EXT_PACKET 9649076Swpaul */ 9749076Swpaul#define netmap_get_mbuf(len) m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR|M_NOFREE) 9849076Swpaul 9949076Swpaul/* mbuf destructor, also need to change the type to EXT_EXTREF, 10049076Swpaul * add an M_NOFREE flag, and then clear the flag and 10149076Swpaul * chain into uma_zfree(zone_pack, mf) 10249076Swpaul * (or reinstall the buffer ?) 10349076Swpaul */ 10449076Swpaul#define SET_MBUF_DESTRUCTOR(m, fn) do { \ 10549076Swpaul (m)->m_ext.ext_free = (void *)fn; \ 10649076Swpaul (m)->m_ext.ext_type = EXT_EXTREF; \ 10749076Swpaul} while (0) 10849076Swpaul 10949076Swpaul 11049076Swpaul#define GET_MBUF_REFCNT(m) ((m)->m_ext.ref_cnt ? *(m)->m_ext.ref_cnt : -1) 11149076Swpaul 11249076Swpaul 11349076Swpaul 11449076Swpaul#else /* linux */ 11549076Swpaul 11649076Swpaul#include "bsd_glue.h" 11749076Swpaul 11849076Swpaul#include <linux/rtnetlink.h> /* rtnl_[un]lock() */ 11949076Swpaul#include <linux/ethtool.h> /* struct ethtool_ops, get_ringparam */ 12049076Swpaul#include <linux/hrtimer.h> 12149076Swpaul 12249076Swpaul//#define RATE /* Enables communication statistics. */ 12349076Swpaul 12449076Swpaul//#define REG_RESET 12549076Swpaul 12649076Swpaul#endif /* linux */ 12749076Swpaul 12849076Swpaul 12949076Swpaul/* Common headers. */ 13049076Swpaul#include <net/netmap.h> 13149076Swpaul#include <dev/netmap/netmap_kern.h> 13249076Swpaul#include <dev/netmap/netmap_mem2.h> 13349076Swpaul 13449076Swpaul 13549076Swpaul 13649076Swpaul/* ======================== usage stats =========================== */ 13749076Swpaul 13849076Swpaul#ifdef RATE 13949076Swpaul#define IFRATE(x) x 14049076Swpaulstruct rate_stats { 14149076Swpaul unsigned long txpkt; 14249076Swpaul unsigned long txsync; 14349076Swpaul unsigned long txirq; 14449076Swpaul unsigned long rxpkt; 14549076Swpaul unsigned long rxirq; 14649076Swpaul unsigned long rxsync; 14749076Swpaul}; 14849076Swpaul 14949076Swpaulstruct rate_context { 15049076Swpaul unsigned refcount; 15149076Swpaul struct timer_list timer; 15249076Swpaul struct rate_stats new; 15349076Swpaul struct rate_stats old; 15449076Swpaul}; 15549076Swpaul 15649076Swpaul#define RATE_PRINTK(_NAME_) \ 15749076Swpaul printk( #_NAME_ " = %lu Hz\n", (cur._NAME_ - ctx->old._NAME_)/RATE_PERIOD); 15849076Swpaul#define RATE_PERIOD 2 15949076Swpaulstatic void rate_callback(unsigned long arg) 16049076Swpaul{ 16149076Swpaul struct rate_context * ctx = (struct rate_context *)arg; 16249076Swpaul struct rate_stats cur = ctx->new; 16349076Swpaul int r; 16449076Swpaul 16549076Swpaul RATE_PRINTK(txpkt); 16649076Swpaul RATE_PRINTK(txsync); 16749076Swpaul RATE_PRINTK(txirq); 16849076Swpaul RATE_PRINTK(rxpkt); 16949076Swpaul RATE_PRINTK(rxsync); 17049076Swpaul RATE_PRINTK(rxirq); 17149076Swpaul printk("\n"); 17249076Swpaul 17349076Swpaul ctx->old = cur; 17449076Swpaul r = mod_timer(&ctx->timer, jiffies + 17549076Swpaul msecs_to_jiffies(RATE_PERIOD * 1000)); 17649076Swpaul if (unlikely(r)) 17749076Swpaul D("[v1000] Error: mod_timer()"); 17849076Swpaul} 17949076Swpaul 18049076Swpaulstatic struct rate_context rate_ctx; 18149076Swpaul 18249076Swpaul#else /* !RATE */ 18349076Swpaul#define IFRATE(x) 18449076Swpaul#endif /* !RATE */ 18549076Swpaul 18649076Swpaul 18749076Swpaul/* =============== GENERIC NETMAP ADAPTER SUPPORT ================= */ 18849076Swpaul#define GENERIC_BUF_SIZE netmap_buf_size /* Size of the mbufs in the Tx pool. */ 18949076Swpaul 19049076Swpaul/* 19149076Swpaul * Wrapper used by the generic adapter layer to notify 19249076Swpaul * the poller threads. Differently from netmap_rx_irq(), we check 19349076Swpaul * only IFCAP_NETMAP instead of NAF_NATIVE_ON to enable the irq. 19449076Swpaul */ 19549076Swpaulstatic void 19649076Swpaulnetmap_generic_irq(struct ifnet *ifp, u_int q, u_int *work_done) 19749076Swpaul{ 19849076Swpaul if (unlikely(!(ifp->if_capenable & IFCAP_NETMAP))) 19949076Swpaul return; 20049076Swpaul 20149076Swpaul netmap_common_irq(ifp, q, work_done); 20249076Swpaul} 20349076Swpaul 20449076Swpaul 20549076Swpaul/* Enable/disable netmap mode for a generic network interface. */ 20649076Swpaulstatic int 20749076Swpaulgeneric_netmap_register(struct netmap_adapter *na, int enable) 20849076Swpaul{ 20949076Swpaul struct ifnet *ifp = na->ifp; 21049076Swpaul struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na; 21149076Swpaul struct mbuf *m; 21249076Swpaul int error; 21349076Swpaul int i, r; 21449076Swpaul 21549076Swpaul if (!na) 21649076Swpaul return EINVAL; 21749076Swpaul 21849076Swpaul#ifdef REG_RESET 21949076Swpaul error = ifp->netdev_ops->ndo_stop(ifp); 22049076Swpaul if (error) { 22149076Swpaul return error; 22249076Swpaul } 22349076Swpaul#endif /* REG_RESET */ 22449076Swpaul 22549076Swpaul if (enable) { /* Enable netmap mode. */ 22649076Swpaul /* Init the mitigation support. */ 22749076Swpaul gna->mit = malloc(na->num_rx_rings * sizeof(struct nm_generic_mit), 22849076Swpaul M_DEVBUF, M_NOWAIT | M_ZERO); 22949076Swpaul if (!gna->mit) { 23049076Swpaul D("mitigation allocation failed"); 23149076Swpaul error = ENOMEM; 23249076Swpaul goto out; 23349076Swpaul } 23449076Swpaul for (r=0; r<na->num_rx_rings; r++) 23549076Swpaul netmap_mitigation_init(&gna->mit[r], na); 23649076Swpaul 23749076Swpaul /* Initialize the rx queue, as generic_rx_handler() can 23849076Swpaul * be called as soon as netmap_catch_rx() returns. 23949076Swpaul */ 24049076Swpaul for (r=0; r<na->num_rx_rings; r++) { 24149076Swpaul mbq_safe_init(&na->rx_rings[r].rx_queue); 24249076Swpaul } 24349076Swpaul 24449076Swpaul /* 24549076Swpaul * Preallocate packet buffers for the tx rings. 24649076Swpaul */ 24749076Swpaul for (r=0; r<na->num_tx_rings; r++) 24849076Swpaul na->tx_rings[r].tx_pool = NULL; 24949076Swpaul for (r=0; r<na->num_tx_rings; r++) { 25049076Swpaul na->tx_rings[r].tx_pool = malloc(na->num_tx_desc * sizeof(struct mbuf *), 25149076Swpaul M_DEVBUF, M_NOWAIT | M_ZERO); 25249076Swpaul if (!na->tx_rings[r].tx_pool) { 25349076Swpaul D("tx_pool allocation failed"); 25449076Swpaul error = ENOMEM; 25549076Swpaul goto free_tx_pools; 25649076Swpaul } 25749076Swpaul for (i=0; i<na->num_tx_desc; i++) 25849076Swpaul na->tx_rings[r].tx_pool[i] = NULL; 25949076Swpaul for (i=0; i<na->num_tx_desc; i++) { 26049076Swpaul m = netmap_get_mbuf(GENERIC_BUF_SIZE); 26149076Swpaul if (!m) { 26249076Swpaul D("tx_pool[%d] allocation failed", i); 26349076Swpaul error = ENOMEM; 26449076Swpaul goto free_tx_pools; 26549076Swpaul } 26649076Swpaul na->tx_rings[r].tx_pool[i] = m; 26749076Swpaul } 26849076Swpaul } 26949076Swpaul rtnl_lock(); 27049076Swpaul /* Prepare to intercept incoming traffic. */ 27149076Swpaul error = netmap_catch_rx(na, 1); 27249076Swpaul if (error) { 27349076Swpaul D("netdev_rx_handler_register() failed (%d)", error); 27449076Swpaul goto register_handler; 27549076Swpaul } 27649076Swpaul ifp->if_capenable |= IFCAP_NETMAP; 27749076Swpaul 27849076Swpaul /* Make netmap control the packet steering. */ 27949076Swpaul netmap_catch_tx(gna, 1); 28049076Swpaul 28149076Swpaul rtnl_unlock(); 28249076Swpaul 28349076Swpaul#ifdef RATE 28449076Swpaul if (rate_ctx.refcount == 0) { 28549076Swpaul D("setup_timer()"); 28649076Swpaul memset(&rate_ctx, 0, sizeof(rate_ctx)); 28749076Swpaul setup_timer(&rate_ctx.timer, &rate_callback, (unsigned long)&rate_ctx); 28849076Swpaul if (mod_timer(&rate_ctx.timer, jiffies + msecs_to_jiffies(1500))) { 28949076Swpaul D("Error: mod_timer()"); 29049076Swpaul } 29149076Swpaul } 29249076Swpaul rate_ctx.refcount++; 29349076Swpaul#endif /* RATE */ 29449076Swpaul 29549076Swpaul } else if (na->tx_rings[0].tx_pool) { 29649076Swpaul /* Disable netmap mode. We enter here only if the previous 29749076Swpaul generic_netmap_register(na, 1) was successfull. 29849076Swpaul If it was not, na->tx_rings[0].tx_pool was set to NULL by the 29949076Swpaul error handling code below. */ 30049076Swpaul rtnl_lock(); 30149076Swpaul 30249076Swpaul ifp->if_capenable &= ~IFCAP_NETMAP; 30349076Swpaul 30449076Swpaul /* Release packet steering control. */ 30549076Swpaul netmap_catch_tx(gna, 0); 30649076Swpaul 30749076Swpaul /* Do not intercept packets on the rx path. */ 30849076Swpaul netmap_catch_rx(na, 0); 30949076Swpaul 31049076Swpaul rtnl_unlock(); 31149076Swpaul 31249076Swpaul /* Free the mbufs going to the netmap rings */ 31349076Swpaul for (r=0; r<na->num_rx_rings; r++) { 31449076Swpaul mbq_safe_purge(&na->rx_rings[r].rx_queue); 31549076Swpaul mbq_safe_destroy(&na->rx_rings[r].rx_queue); 31649076Swpaul } 31749076Swpaul 31849076Swpaul for (r=0; r<na->num_rx_rings; r++) 31949076Swpaul netmap_mitigation_cleanup(&gna->mit[r]); 32049076Swpaul free(gna->mit, M_DEVBUF); 32149076Swpaul 32249076Swpaul for (r=0; r<na->num_tx_rings; r++) { 32349076Swpaul for (i=0; i<na->num_tx_desc; i++) { 32449076Swpaul m_freem(na->tx_rings[r].tx_pool[i]); 32549076Swpaul } 32649076Swpaul free(na->tx_rings[r].tx_pool, M_DEVBUF); 32749076Swpaul } 32849076Swpaul 32949076Swpaul#ifdef RATE 33049076Swpaul if (--rate_ctx.refcount == 0) { 33149076Swpaul D("del_timer()"); 33249076Swpaul del_timer(&rate_ctx.timer); 33349076Swpaul } 33449076Swpaul#endif 33549076Swpaul } 33649076Swpaul 33749076Swpaul#ifdef REG_RESET 33849076Swpaul error = ifp->netdev_ops->ndo_open(ifp); 33949076Swpaul if (error) { 34049076Swpaul goto free_tx_pools; 34149076Swpaul } 34249076Swpaul#endif 34349076Swpaul 34449076Swpaul return 0; 34549076Swpaul 34649076Swpaulregister_handler: 34749076Swpaul rtnl_unlock(); 34849076Swpaulfree_tx_pools: 34949076Swpaul for (r=0; r<na->num_tx_rings; r++) { 35049076Swpaul if (na->tx_rings[r].tx_pool == NULL) 35149076Swpaul continue; 35281737Swpaul for (i=0; i<na->num_tx_desc; i++) 35381737Swpaul if (na->tx_rings[r].tx_pool[i]) 35449076Swpaul m_freem(na->tx_rings[r].tx_pool[i]); 35549076Swpaul free(na->tx_rings[r].tx_pool, M_DEVBUF); 35649076Swpaul na->tx_rings[r].tx_pool = NULL; 35749076Swpaul } 35849076Swpaul for (r=0; r<na->num_rx_rings; r++) { 35949076Swpaul netmap_mitigation_cleanup(&gna->mit[r]); 36049076Swpaul mbq_safe_destroy(&na->rx_rings[r].rx_queue); 36149076Swpaul } 36249076Swpaul free(gna->mit, M_DEVBUF); 36349076Swpaulout: 36449076Swpaul 36549076Swpaul return error; 36649076Swpaul} 36749076Swpaul 36849076Swpaul/* 36949076Swpaul * Callback invoked when the device driver frees an mbuf used 37049076Swpaul * by netmap to transmit a packet. This usually happens when 37149076Swpaul * the NIC notifies the driver that transmission is completed. 37249076Swpaul */ 37349076Swpaulstatic void 37449076Swpaulgeneric_mbuf_destructor(struct mbuf *m) 37549076Swpaul{ 37649076Swpaul if (netmap_verbose) 37749076Swpaul D("Tx irq (%p) queue %d", m, MBUF_TXQ(m)); 37849076Swpaul netmap_generic_irq(MBUF_IFP(m), MBUF_TXQ(m), NULL); 37949076Swpaul#ifdef __FreeBSD__ 38049076Swpaul m->m_ext.ext_type = EXT_PACKET; 38149076Swpaul m->m_ext.ext_free = NULL; 38249076Swpaul if (*(m->m_ext.ref_cnt) == 0) 38349076Swpaul *(m->m_ext.ref_cnt) = 1; 38449076Swpaul uma_zfree(zone_pack, m); 38549076Swpaul#endif /* __FreeBSD__ */ 38649076Swpaul IFRATE(rate_ctx.new.txirq++); 38749076Swpaul} 38849076Swpaul 38949076Swpaul/* Record completed transmissions and update hwtail. 39049076Swpaul * 39149076Swpaul * The oldest tx buffer not yet completed is at nr_hwtail + 1, 39249076Swpaul * nr_hwcur is the first unsent buffer. 39349076Swpaul */ 39449076Swpaulstatic u_int 39549076Swpaulgeneric_netmap_tx_clean(struct netmap_kring *kring) 39649076Swpaul{ 39749076Swpaul u_int const lim = kring->nkr_num_slots - 1; 39849076Swpaul u_int nm_i = nm_next(kring->nr_hwtail, lim); 39949076Swpaul u_int hwcur = kring->nr_hwcur; 40049076Swpaul u_int n = 0; 40149076Swpaul struct mbuf **tx_pool = kring->tx_pool; 40249076Swpaul 40349076Swpaul while (nm_i != hwcur) { /* buffers not completed */ 40449076Swpaul struct mbuf *m = tx_pool[nm_i]; 40549076Swpaul 40649076Swpaul if (unlikely(m == NULL)) { 40749076Swpaul /* this is done, try to replenish the entry */ 40849076Swpaul tx_pool[nm_i] = m = netmap_get_mbuf(GENERIC_BUF_SIZE); 40949076Swpaul if (unlikely(m == NULL)) { 41049076Swpaul D("mbuf allocation failed, XXX error"); 41149076Swpaul // XXX how do we proceed ? break ? 41249076Swpaul return -ENOMEM; 41349076Swpaul } 41449076Swpaul } else if (GET_MBUF_REFCNT(m) != 1) { 41549076Swpaul break; /* This mbuf is still busy: its refcnt is 2. */ 41649076Swpaul } 41749076Swpaul n++; 41849076Swpaul nm_i = nm_next(nm_i, lim); 41949076Swpaul } 42049076Swpaul kring->nr_hwtail = nm_prev(nm_i, lim); 42149076Swpaul ND("tx completed [%d] -> hwtail %d", n, kring->nr_hwtail); 42249076Swpaul 42349076Swpaul return n; 42449076Swpaul} 42549076Swpaul 42649076Swpaul 42749076Swpaul/* 42849076Swpaul * We have pending packets in the driver between nr_hwtail +1 and hwcur. 42949076Swpaul * Compute a position in the middle, to be used to generate 43049076Swpaul * a notification. 43149076Swpaul */ 43249076Swpaulstatic inline u_int 43349076Swpaulgeneric_tx_event_middle(struct netmap_kring *kring, u_int hwcur) 43449076Swpaul{ 43549076Swpaul u_int n = kring->nkr_num_slots; 43649076Swpaul u_int ntc = nm_next(kring->nr_hwtail, n-1); 43749076Swpaul u_int e; 43849076Swpaul 43949076Swpaul if (hwcur >= ntc) { 44049076Swpaul e = (hwcur + ntc) / 2; 44149076Swpaul } else { /* wrap around */ 44249076Swpaul e = (hwcur + n + ntc) / 2; 44349076Swpaul if (e >= n) { 44449076Swpaul e -= n; 44549076Swpaul } 44649076Swpaul } 44749076Swpaul 44849076Swpaul if (unlikely(e >= n)) { 44949076Swpaul D("This cannot happen"); 45049076Swpaul e = 0; 45149076Swpaul } 45249076Swpaul 45349076Swpaul return e; 45449076Swpaul} 45549076Swpaul 45649076Swpaul/* 45749076Swpaul * We have pending packets in the driver between nr_hwtail+1 and hwcur. 45849076Swpaul * Schedule a notification approximately in the middle of the two. 45949076Swpaul * There is a race but this is only called within txsync which does 46049076Swpaul * a double check. 46149076Swpaul */ 46249076Swpaulstatic void 46349076Swpaulgeneric_set_tx_event(struct netmap_kring *kring, u_int hwcur) 46449076Swpaul{ 46549076Swpaul struct mbuf *m; 46649076Swpaul u_int e; 46749076Swpaul 46849076Swpaul if (nm_next(kring->nr_hwtail, kring->nkr_num_slots -1) == hwcur) { 46949076Swpaul return; /* all buffers are free */ 47049076Swpaul } 47149076Swpaul e = generic_tx_event_middle(kring, hwcur); 47249076Swpaul 47349076Swpaul m = kring->tx_pool[e]; 47449076Swpaul if (m == NULL) { 47549076Swpaul /* This can happen if there is already an event on the netmap 47649076Swpaul slot 'e': There is nothing to do. */ 47749076Swpaul return; 47849076Swpaul } 47949076Swpaul ND("Event at %d mbuf %p refcnt %d", e, m, GET_MBUF_REFCNT(m)); 48049076Swpaul kring->tx_pool[e] = NULL; 48149076Swpaul SET_MBUF_DESTRUCTOR(m, generic_mbuf_destructor); 48249076Swpaul 48349076Swpaul // XXX wmb() ? 48449076Swpaul /* Decrement the refcount an free it if we have the last one. */ 48549076Swpaul m_freem(m); 48649076Swpaul smp_mb(); 48749076Swpaul} 48849076Swpaul 48949076Swpaul 49049076Swpaul/* 49149076Swpaul * generic_netmap_txsync() transforms netmap buffers into mbufs 49249076Swpaul * and passes them to the standard device driver 49349076Swpaul * (ndo_start_xmit() or ifp->if_transmit() ). 49449076Swpaul * On linux this is not done directly, but using dev_queue_xmit(), 49549076Swpaul * since it implements the TX flow control (and takes some locks). 49649076Swpaul */ 49749076Swpaulstatic int 49849076Swpaulgeneric_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) 49949076Swpaul{ 50049076Swpaul struct ifnet *ifp = na->ifp; 50149076Swpaul struct netmap_kring *kring = &na->tx_rings[ring_nr]; 50249076Swpaul struct netmap_ring *ring = kring->ring; 50349076Swpaul u_int nm_i; /* index into the netmap ring */ // j 50449076Swpaul u_int const lim = kring->nkr_num_slots - 1; 50549076Swpaul u_int const head = kring->rhead; 50649076Swpaul 50749076Swpaul IFRATE(rate_ctx.new.txsync++); 50849076Swpaul 50949076Swpaul // TODO: handle the case of mbuf allocation failure 51049076Swpaul 51149076Swpaul rmb(); 51249076Swpaul 51349076Swpaul /* 51449076Swpaul * First part: process new packets to send. 51549076Swpaul */ 51649076Swpaul nm_i = kring->nr_hwcur; 51749076Swpaul if (nm_i != head) { /* we have new packets to send */ 51849076Swpaul while (nm_i != head) { 51949076Swpaul struct netmap_slot *slot = &ring->slot[nm_i]; 52049076Swpaul u_int len = slot->len; 52149076Swpaul void *addr = NMB(slot); 52249076Swpaul 52349076Swpaul /* device-specific */ 52449076Swpaul struct mbuf *m; 52549076Swpaul int tx_ret; 52649076Swpaul 52749076Swpaul NM_CHECK_ADDR_LEN(addr, len); 52849076Swpaul 52949076Swpaul /* Tale a mbuf from the tx pool and copy in the user packet. */ 53049076Swpaul m = kring->tx_pool[nm_i]; 53149076Swpaul if (unlikely(!m)) { 53249076Swpaul RD(5, "This should never happen"); 53349076Swpaul kring->tx_pool[nm_i] = m = netmap_get_mbuf(GENERIC_BUF_SIZE); 53449076Swpaul if (unlikely(m == NULL)) { 53549076Swpaul D("mbuf allocation failed"); 53649076Swpaul break; 53749076Swpaul } 53849076Swpaul } 53949076Swpaul /* XXX we should ask notifications when NS_REPORT is set, 54049076Swpaul * or roughly every half frame. We can optimize this 54149076Swpaul * by lazily requesting notifications only when a 54249076Swpaul * transmission fails. Probably the best way is to 54349076Swpaul * break on failures and set notifications when 54449076Swpaul * ring->cur == ring->tail || nm_i != cur 54549076Swpaul */ 54649076Swpaul tx_ret = generic_xmit_frame(ifp, m, addr, len, ring_nr); 54749076Swpaul if (unlikely(tx_ret)) { 54849076Swpaul RD(5, "start_xmit failed: err %d [nm_i %u, head %u, hwtail %u]", 54949076Swpaul tx_ret, nm_i, head, kring->nr_hwtail); 55049076Swpaul /* 55149076Swpaul * No room for this mbuf in the device driver. 55249076Swpaul * Request a notification FOR A PREVIOUS MBUF, 55349076Swpaul * then call generic_netmap_tx_clean(kring) to do the 55449076Swpaul * double check and see if we can free more buffers. 55549076Swpaul * If there is space continue, else break; 55649076Swpaul * NOTE: the double check is necessary if the problem 55749076Swpaul * occurs in the txsync call after selrecord(). 55849076Swpaul * Also, we need some way to tell the caller that not 55949076Swpaul * all buffers were queued onto the device (this was 56049076Swpaul * not a problem with native netmap driver where space 56149076Swpaul * is preallocated). The bridge has a similar problem 56249076Swpaul * and we solve it there by dropping the excess packets. 56349076Swpaul */ 56449076Swpaul generic_set_tx_event(kring, nm_i); 56549076Swpaul if (generic_netmap_tx_clean(kring)) { /* space now available */ 56649076Swpaul continue; 56749076Swpaul } else { 56849076Swpaul break; 56949076Swpaul } 57049076Swpaul } 57149076Swpaul slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); 57249076Swpaul nm_i = nm_next(nm_i, lim); 57349076Swpaul IFRATE(rate_ctx.new.txpkt ++); 57449076Swpaul } 57549076Swpaul 57649076Swpaul /* Update hwcur to the next slot to transmit. */ 57749076Swpaul kring->nr_hwcur = nm_i; /* not head, we could break early */ 57849076Swpaul } 57949076Swpaul 58049076Swpaul /* 58149076Swpaul * Second, reclaim completed buffers 58249076Swpaul */ 58349076Swpaul if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) { 58449076Swpaul /* No more available slots? Set a notification event 58549076Swpaul * on a netmap slot that will be cleaned in the future. 58649076Swpaul * No doublecheck is performed, since txsync() will be 58749076Swpaul * called twice by netmap_poll(). 58849076Swpaul */ 58949076Swpaul generic_set_tx_event(kring, nm_i); 59049076Swpaul } 59149076Swpaul ND("tx #%d, hwtail = %d", n, kring->nr_hwtail); 59249076Swpaul 59349076Swpaul generic_netmap_tx_clean(kring); 59449076Swpaul 59549076Swpaul nm_txsync_finalize(kring); 59649076Swpaul 59749076Swpaul return 0; 59849076Swpaul} 59949076Swpaul 60049076Swpaul 60149076Swpaul/* 60249076Swpaul * This handler is registered (through netmap_catch_rx()) 60349076Swpaul * within the attached network interface 60449076Swpaul * in the RX subsystem, so that every mbuf passed up by 60549076Swpaul * the driver can be stolen to the network stack. 60649076Swpaul * Stolen packets are put in a queue where the 60749076Swpaul * generic_netmap_rxsync() callback can extract them. 60849076Swpaul */ 60949076Swpaulvoid 61049076Swpaulgeneric_rx_handler(struct ifnet *ifp, struct mbuf *m) 61149076Swpaul{ 61249076Swpaul struct netmap_adapter *na = NA(ifp); 61349076Swpaul struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na; 61449076Swpaul u_int work_done; 61549076Swpaul u_int rr = MBUF_RXQ(m); // receive ring number 61649076Swpaul 61754161Swpaul if (rr >= na->num_rx_rings) { 61854161Swpaul rr = rr % na->num_rx_rings; // XXX expensive... 61954161Swpaul } 62054161Swpaul 62154161Swpaul /* limit the size of the queue */ 62254161Swpaul if (unlikely(mbq_len(&na->rx_rings[rr].rx_queue) > 1024)) { 62354161Swpaul m_freem(m); 62454161Swpaul } else { 62549076Swpaul mbq_safe_enqueue(&na->rx_rings[rr].rx_queue, m); 62649076Swpaul } 62749076Swpaul 62849076Swpaul if (netmap_generic_mit < 32768) { 62949076Swpaul /* no rx mitigation, pass notification up */ 63049076Swpaul netmap_generic_irq(na->ifp, rr, &work_done); 63149076Swpaul IFRATE(rate_ctx.new.rxirq++); 63249076Swpaul } else { 63349076Swpaul /* same as send combining, filter notification if there is a 63449076Swpaul * pending timer, otherwise pass it up and start a timer. 63549076Swpaul */ 63649076Swpaul if (likely(netmap_mitigation_active(&gna->mit[rr]))) { 63749076Swpaul /* Record that there is some pending work. */ 63849076Swpaul gna->mit[rr].mit_pending = 1; 63949076Swpaul } else { 64049076Swpaul netmap_generic_irq(na->ifp, rr, &work_done); 64149076Swpaul IFRATE(rate_ctx.new.rxirq++); 64249076Swpaul netmap_mitigation_start(&gna->mit[rr]); 64349076Swpaul } 64449076Swpaul } 64549076Swpaul} 64649076Swpaul 64749076Swpaul/* 64849076Swpaul * generic_netmap_rxsync() extracts mbufs from the queue filled by 64949076Swpaul * generic_netmap_rx_handler() and puts their content in the netmap 65049076Swpaul * receive ring. 65149076Swpaul * Access must be protected because the rx handler is asynchronous, 65249076Swpaul */ 65349076Swpaulstatic int 65449076Swpaulgeneric_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) 65549076Swpaul{ 65649076Swpaul struct netmap_kring *kring = &na->rx_rings[ring_nr]; 65749076Swpaul struct netmap_ring *ring = kring->ring; 65849076Swpaul u_int nm_i; /* index into the netmap ring */ //j, 65949076Swpaul u_int n; 66049076Swpaul u_int const lim = kring->nkr_num_slots - 1; 66149076Swpaul u_int const head = nm_rxsync_prologue(kring); 66249076Swpaul int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; 66349076Swpaul 66449076Swpaul if (head > lim) 66549076Swpaul return netmap_ring_reinit(kring); 66649076Swpaul 66749076Swpaul /* 66849076Swpaul * First part: import newly received packets. 66949076Swpaul */ 67049076Swpaul if (netmap_no_pendintr || force_update) { 67149076Swpaul /* extract buffers from the rx queue, stop at most one 67249076Swpaul * slot before nr_hwcur (stop_i) 67349076Swpaul */ 67449076Swpaul uint16_t slot_flags = kring->nkr_slot_flags; 67549076Swpaul u_int stop_i = nm_prev(kring->nr_hwcur, lim); 67649076Swpaul 67749076Swpaul nm_i = kring->nr_hwtail; /* first empty slot in the receive ring */ 67849076Swpaul for (n = 0; nm_i != stop_i; n++) { 67949076Swpaul int len; 68049076Swpaul void *addr = NMB(&ring->slot[nm_i]); 68149076Swpaul struct mbuf *m; 68249076Swpaul 68349076Swpaul /* we only check the address here on generic rx rings */ 68449076Swpaul if (addr == netmap_buffer_base) { /* Bad buffer */ 68549076Swpaul return netmap_ring_reinit(kring); 68649076Swpaul } 68749076Swpaul /* 68849076Swpaul * Call the locked version of the function. 68949076Swpaul * XXX Ideally we could grab a batch of mbufs at once 69049076Swpaul * and save some locking overhead. 69149076Swpaul */ 69249076Swpaul m = mbq_safe_dequeue(&kring->rx_queue); 69349076Swpaul if (!m) /* no more data */ 69449076Swpaul break; 69549076Swpaul len = MBUF_LEN(m); 69649076Swpaul m_copydata(m, 0, len, addr); 69749076Swpaul ring->slot[nm_i].len = len; 69849076Swpaul ring->slot[nm_i].flags = slot_flags; 69949076Swpaul m_freem(m); 70049076Swpaul nm_i = nm_next(nm_i, lim); 70149076Swpaul } 70249076Swpaul if (n) { 70349076Swpaul kring->nr_hwtail = nm_i; 70449076Swpaul IFRATE(rate_ctx.new.rxpkt += n); 70549076Swpaul } 70649076Swpaul kring->nr_kflags &= ~NKR_PENDINTR; 70749076Swpaul } 70849076Swpaul 70949076Swpaul // XXX should we invert the order ? 71049076Swpaul /* 71149076Swpaul * Second part: skip past packets that userspace has released. 71249076Swpaul */ 71349076Swpaul nm_i = kring->nr_hwcur; 71449076Swpaul if (nm_i != head) { 71549076Swpaul /* Userspace has released some packets. */ 71649076Swpaul for (n = 0; nm_i != head; n++) { 71749076Swpaul struct netmap_slot *slot = &ring->slot[nm_i]; 71849076Swpaul 71949076Swpaul slot->flags &= ~NS_BUF_CHANGED; 72049076Swpaul nm_i = nm_next(nm_i, lim); 72149076Swpaul } 72249076Swpaul kring->nr_hwcur = head; 72349076Swpaul } 72449076Swpaul /* tell userspace that there might be new packets. */ 72549076Swpaul nm_rxsync_finalize(kring); 72649076Swpaul IFRATE(rate_ctx.new.rxsync++); 72749076Swpaul 72849076Swpaul return 0; 72949076Swpaul} 73049076Swpaul 73149076Swpaulstatic void 73249076Swpaulgeneric_netmap_dtor(struct netmap_adapter *na) 73349076Swpaul{ 73449076Swpaul struct ifnet *ifp = na->ifp; 73549076Swpaul struct netmap_generic_adapter *gna = (struct netmap_generic_adapter*)na; 73649076Swpaul struct netmap_adapter *prev_na = gna->prev; 73749076Swpaul 73849076Swpaul if (prev_na != NULL) { 73949076Swpaul D("Released generic NA %p", gna); 74049076Swpaul if_rele(na->ifp); 74149076Swpaul netmap_adapter_put(prev_na); 74249076Swpaul } 74349076Swpaul if (ifp != NULL) { 74449076Swpaul WNA(ifp) = prev_na; 74549076Swpaul D("Restored native NA %p", prev_na); 74649076Swpaul na->ifp = NULL; 74749076Swpaul } 74849076Swpaul} 74949076Swpaul 75049076Swpaul/* 75149076Swpaul * generic_netmap_attach() makes it possible to use netmap on 75249076Swpaul * a device without native netmap support. 75349076Swpaul * This is less performant than native support but potentially 75449076Swpaul * faster than raw sockets or similar schemes. 75549076Swpaul * 75649076Swpaul * In this "emulated" mode, netmap rings do not necessarily 75749076Swpaul * have the same size as those in the NIC. We use a default 75849076Swpaul * value and possibly override it if the OS has ways to fetch the 75949076Swpaul * actual configuration. 76049076Swpaul */ 76149076Swpaulint 76249076Swpaulgeneric_netmap_attach(struct ifnet *ifp) 76349076Swpaul{ 76449076Swpaul struct netmap_adapter *na; 76549076Swpaul struct netmap_generic_adapter *gna; 76649076Swpaul int retval; 76749076Swpaul u_int num_tx_desc, num_rx_desc; 76849076Swpaul 76949076Swpaul num_tx_desc = num_rx_desc = netmap_generic_ringsize; /* starting point */ 77049076Swpaul 77149076Swpaul generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc); 77249076Swpaul ND("Netmap ring size: TX = %d, RX = %d", num_tx_desc, num_rx_desc); 77349076Swpaul 77449076Swpaul gna = malloc(sizeof(*gna), M_DEVBUF, M_NOWAIT | M_ZERO); 77549076Swpaul if (gna == NULL) { 77649076Swpaul D("no memory on attach, give up"); 77749076Swpaul return ENOMEM; 77849076Swpaul } 77949076Swpaul na = (struct netmap_adapter *)gna; 78049076Swpaul na->ifp = ifp; 78149076Swpaul na->num_tx_desc = num_tx_desc; 78249076Swpaul na->num_rx_desc = num_rx_desc; 78349076Swpaul na->nm_register = &generic_netmap_register; 78449076Swpaul na->nm_txsync = &generic_netmap_txsync; 78549076Swpaul na->nm_rxsync = &generic_netmap_rxsync; 78649076Swpaul na->nm_dtor = &generic_netmap_dtor; 78749076Swpaul /* when using generic, IFCAP_NETMAP is set so we force 78849076Swpaul * NAF_SKIP_INTR to use the regular interrupt handler 78949076Swpaul */ 79049076Swpaul na->na_flags = NAF_SKIP_INTR | NAF_HOST_RINGS; 79149076Swpaul 79249076Swpaul ND("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)", 79349076Swpaul ifp->num_tx_queues, ifp->real_num_tx_queues, 79449076Swpaul ifp->tx_queue_len); 79549076Swpaul ND("[GNA] num_rx_queues(%d), real_num_rx_queues(%d)", 79649076Swpaul ifp->num_rx_queues, ifp->real_num_rx_queues); 79749076Swpaul 79849076Swpaul generic_find_num_queues(ifp, &na->num_tx_rings, &na->num_rx_rings); 79949076Swpaul 80049076Swpaul retval = netmap_attach_common(na); 80149076Swpaul if (retval) { 80249076Swpaul free(gna, M_DEVBUF); 80349076Swpaul } 80449076Swpaul 80549076Swpaul return retval; 80649076Swpaul} 80749076Swpaul