netmap_generic.c revision 262152
149076Swpaul/*
249076Swpaul * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
349076Swpaul *
449076Swpaul * Redistribution and use in source and binary forms, with or without
549076Swpaul * modification, are permitted provided that the following conditions
649076Swpaul * are met:
749076Swpaul *   1. Redistributions of source code must retain the above copyright
849076Swpaul *      notice, this list of conditions and the following disclaimer.
949076Swpaul *   2. Redistributions in binary form must reproduce the above copyright
1049076Swpaul *      notice, this list of conditions and the following disclaimer in the
1149076Swpaul *      documentation and/or other materials provided with the distribution.
1249076Swpaul *
1349076Swpaul * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1449076Swpaul * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1549076Swpaul * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1649076Swpaul * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1749076Swpaul * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1849076Swpaul * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
1949076Swpaul * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2049076Swpaul * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2149076Swpaul * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2249076Swpaul * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2349076Swpaul * SUCH DAMAGE.
2449076Swpaul */
2549076Swpaul
2649076Swpaul/*
2749076Swpaul * This module implements netmap support on top of standard,
2849076Swpaul * unmodified device drivers.
2949076Swpaul *
3049076Swpaul * A NIOCREGIF request is handled here if the device does not
3149076Swpaul * have native support. TX and RX rings are emulated as follows:
3250477Speter *
3349076Swpaul * NIOCREGIF
3449076Swpaul *	We preallocate a block of TX mbufs (roughly as many as
3549076Swpaul *	tx descriptors; the number is not critical) to speed up
3649076Swpaul *	operation during transmissions. The refcount on most of
3749076Swpaul *	these buffers is artificially bumped up so we can recycle
3849076Swpaul *	them more easily. Also, the destructor is intercepted
3949076Swpaul *	so we use it as an interrupt notification to wake up
4049076Swpaul *	processes blocked on a poll().
4149076Swpaul *
4249076Swpaul *	For each receive ring we allocate one "struct mbq"
4349076Swpaul *	(an mbuf tailq plus a spinlock). We intercept packets
4449076Swpaul *	(through if_input)
4549076Swpaul *	on the receive path and put them in the mbq from which
4649076Swpaul *	netmap receive routines can grab them.
4749076Swpaul *
4849076Swpaul * TX:
4949076Swpaul *	in the generic_txsync() routine, netmap buffers are copied
5049076Swpaul *	(or linked, in a future) to the preallocated mbufs
5149076Swpaul *	and pushed to the transmit queue. Some of these mbufs
5249076Swpaul *	(those with NS_REPORT, or otherwise every half ring)
5349076Swpaul *	have the refcount=1, others have refcount=2.
5449076Swpaul *	When the destructor is invoked, we take that as
5549076Swpaul *	a notification that all mbufs up to that one in
5649076Swpaul *	the specific ring have been completed, and generate
5749076Swpaul *	the equivalent of a transmit interrupt.
5849076Swpaul *
5949076Swpaul * RX:
6049076Swpaul *
6149076Swpaul */
6249076Swpaul
6353468Swpaul#ifdef __FreeBSD__
6453468Swpaul
6553468Swpaul#include <sys/cdefs.h> /* prerequisite */
6649076Swpaul__FBSDID("$FreeBSD: stable/10/sys/dev/netmap/netmap_generic.c 262152 2014-02-18 05:46:19Z luigi $");
6749076Swpaul
6849076Swpaul#include <sys/types.h>
6949076Swpaul#include <sys/errno.h>
7053468Swpaul#include <sys/malloc.h>
7153468Swpaul#include <sys/lock.h>   /* PROT_EXEC */
7249076Swpaul#include <sys/rwlock.h>
7349076Swpaul#include <sys/socket.h> /* sockaddrs */
7449076Swpaul#include <sys/selinfo.h>
7549076Swpaul#include <net/if.h>
7649076Swpaul#include <net/if_var.h>
7749076Swpaul#include <machine/bus.h>        /* bus_dmamap_* in netmap_kern.h */
7849076Swpaul
7949076Swpaul// XXX temporary - D() defined here
8049076Swpaul#include <net/netmap.h>
8149076Swpaul#include <dev/netmap/netmap_kern.h>
8249076Swpaul#include <dev/netmap/netmap_mem2.h>
8349076Swpaul
8449076Swpaul#define rtnl_lock() D("rtnl_lock called");
8549076Swpaul#define rtnl_unlock() D("rtnl_unlock called");
8649076Swpaul#define MBUF_TXQ(m)	((m)->m_pkthdr.flowid)
8749076Swpaul#define MBUF_RXQ(m)	((m)->m_pkthdr.flowid)
8849076Swpaul#define smp_mb()
8949076Swpaul
9049076Swpaul/*
9149076Swpaul * mbuf wrappers
9249076Swpaul */
9349076Swpaul
9449076Swpaul/*
9549076Swpaul * we allocate an EXT_PACKET
9649076Swpaul */
9749076Swpaul#define netmap_get_mbuf(len) m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR|M_NOFREE)
9849076Swpaul
9949076Swpaul/* mbuf destructor, also need to change the type to EXT_EXTREF,
10049076Swpaul * add an M_NOFREE flag, and then clear the flag and
10149076Swpaul * chain into uma_zfree(zone_pack, mf)
10249076Swpaul * (or reinstall the buffer ?)
10349076Swpaul */
10449076Swpaul#define SET_MBUF_DESTRUCTOR(m, fn)	do {		\
10549076Swpaul	(m)->m_ext.ext_free = (void *)fn;	\
10649076Swpaul	(m)->m_ext.ext_type = EXT_EXTREF;	\
10749076Swpaul} while (0)
10849076Swpaul
10949076Swpaul
11049076Swpaul#define GET_MBUF_REFCNT(m)	((m)->m_ext.ref_cnt ? *(m)->m_ext.ref_cnt : -1)
11149076Swpaul
11249076Swpaul
11349076Swpaul
11449076Swpaul#else /* linux */
11549076Swpaul
11649076Swpaul#include "bsd_glue.h"
11749076Swpaul
11849076Swpaul#include <linux/rtnetlink.h>    /* rtnl_[un]lock() */
11949076Swpaul#include <linux/ethtool.h>      /* struct ethtool_ops, get_ringparam */
12049076Swpaul#include <linux/hrtimer.h>
12149076Swpaul
12249076Swpaul//#define RATE  /* Enables communication statistics. */
12349076Swpaul
12449076Swpaul//#define REG_RESET
12549076Swpaul
12649076Swpaul#endif /* linux */
12749076Swpaul
12849076Swpaul
12949076Swpaul/* Common headers. */
13049076Swpaul#include <net/netmap.h>
13149076Swpaul#include <dev/netmap/netmap_kern.h>
13249076Swpaul#include <dev/netmap/netmap_mem2.h>
13349076Swpaul
13449076Swpaul
13549076Swpaul
13649076Swpaul/* ======================== usage stats =========================== */
13749076Swpaul
13849076Swpaul#ifdef RATE
13949076Swpaul#define IFRATE(x) x
14049076Swpaulstruct rate_stats {
14149076Swpaul	unsigned long txpkt;
14249076Swpaul	unsigned long txsync;
14349076Swpaul	unsigned long txirq;
14449076Swpaul	unsigned long rxpkt;
14549076Swpaul	unsigned long rxirq;
14649076Swpaul	unsigned long rxsync;
14749076Swpaul};
14849076Swpaul
14949076Swpaulstruct rate_context {
15049076Swpaul	unsigned refcount;
15149076Swpaul	struct timer_list timer;
15249076Swpaul	struct rate_stats new;
15349076Swpaul	struct rate_stats old;
15449076Swpaul};
15549076Swpaul
15649076Swpaul#define RATE_PRINTK(_NAME_) \
15749076Swpaul	printk( #_NAME_ " = %lu Hz\n", (cur._NAME_ - ctx->old._NAME_)/RATE_PERIOD);
15849076Swpaul#define RATE_PERIOD  2
15949076Swpaulstatic void rate_callback(unsigned long arg)
16049076Swpaul{
16149076Swpaul	struct rate_context * ctx = (struct rate_context *)arg;
16249076Swpaul	struct rate_stats cur = ctx->new;
16349076Swpaul	int r;
16449076Swpaul
16549076Swpaul	RATE_PRINTK(txpkt);
16649076Swpaul	RATE_PRINTK(txsync);
16749076Swpaul	RATE_PRINTK(txirq);
16849076Swpaul	RATE_PRINTK(rxpkt);
16949076Swpaul	RATE_PRINTK(rxsync);
17049076Swpaul	RATE_PRINTK(rxirq);
17149076Swpaul	printk("\n");
17249076Swpaul
17349076Swpaul	ctx->old = cur;
17449076Swpaul	r = mod_timer(&ctx->timer, jiffies +
17549076Swpaul			msecs_to_jiffies(RATE_PERIOD * 1000));
17649076Swpaul	if (unlikely(r))
17749076Swpaul		D("[v1000] Error: mod_timer()");
17849076Swpaul}
17949076Swpaul
18049076Swpaulstatic struct rate_context rate_ctx;
18149076Swpaul
18249076Swpaul#else /* !RATE */
18349076Swpaul#define IFRATE(x)
18449076Swpaul#endif /* !RATE */
18549076Swpaul
18649076Swpaul
18749076Swpaul/* =============== GENERIC NETMAP ADAPTER SUPPORT ================= */
18849076Swpaul#define GENERIC_BUF_SIZE        netmap_buf_size    /* Size of the mbufs in the Tx pool. */
18949076Swpaul
19049076Swpaul/*
19149076Swpaul * Wrapper used by the generic adapter layer to notify
19249076Swpaul * the poller threads. Differently from netmap_rx_irq(), we check
19349076Swpaul * only IFCAP_NETMAP instead of NAF_NATIVE_ON to enable the irq.
19449076Swpaul */
19549076Swpaulstatic void
19649076Swpaulnetmap_generic_irq(struct ifnet *ifp, u_int q, u_int *work_done)
19749076Swpaul{
19849076Swpaul	if (unlikely(!(ifp->if_capenable & IFCAP_NETMAP)))
19949076Swpaul		return;
20049076Swpaul
20149076Swpaul	netmap_common_irq(ifp, q, work_done);
20249076Swpaul}
20349076Swpaul
20449076Swpaul
20549076Swpaul/* Enable/disable netmap mode for a generic network interface. */
20649076Swpaulstatic int
20749076Swpaulgeneric_netmap_register(struct netmap_adapter *na, int enable)
20849076Swpaul{
20949076Swpaul	struct ifnet *ifp = na->ifp;
21049076Swpaul	struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
21149076Swpaul	struct mbuf *m;
21249076Swpaul	int error;
21349076Swpaul	int i, r;
21449076Swpaul
21549076Swpaul	if (!na)
21649076Swpaul		return EINVAL;
21749076Swpaul
21849076Swpaul#ifdef REG_RESET
21949076Swpaul	error = ifp->netdev_ops->ndo_stop(ifp);
22049076Swpaul	if (error) {
22149076Swpaul		return error;
22249076Swpaul	}
22349076Swpaul#endif /* REG_RESET */
22449076Swpaul
22549076Swpaul	if (enable) { /* Enable netmap mode. */
22649076Swpaul		/* Init the mitigation support. */
22749076Swpaul		gna->mit = malloc(na->num_rx_rings * sizeof(struct nm_generic_mit),
22849076Swpaul					M_DEVBUF, M_NOWAIT | M_ZERO);
22949076Swpaul		if (!gna->mit) {
23049076Swpaul			D("mitigation allocation failed");
23149076Swpaul			error = ENOMEM;
23249076Swpaul			goto out;
23349076Swpaul		}
23449076Swpaul		for (r=0; r<na->num_rx_rings; r++)
23549076Swpaul			netmap_mitigation_init(&gna->mit[r], na);
23649076Swpaul
23749076Swpaul		/* Initialize the rx queue, as generic_rx_handler() can
23849076Swpaul		 * be called as soon as netmap_catch_rx() returns.
23949076Swpaul		 */
24049076Swpaul		for (r=0; r<na->num_rx_rings; r++) {
24149076Swpaul			mbq_safe_init(&na->rx_rings[r].rx_queue);
24249076Swpaul		}
24349076Swpaul
24449076Swpaul		/*
24549076Swpaul		 * Preallocate packet buffers for the tx rings.
24649076Swpaul		 */
24749076Swpaul		for (r=0; r<na->num_tx_rings; r++)
24849076Swpaul			na->tx_rings[r].tx_pool = NULL;
24949076Swpaul		for (r=0; r<na->num_tx_rings; r++) {
25049076Swpaul			na->tx_rings[r].tx_pool = malloc(na->num_tx_desc * sizeof(struct mbuf *),
25149076Swpaul					M_DEVBUF, M_NOWAIT | M_ZERO);
25249076Swpaul			if (!na->tx_rings[r].tx_pool) {
25349076Swpaul				D("tx_pool allocation failed");
25449076Swpaul				error = ENOMEM;
25549076Swpaul				goto free_tx_pools;
25649076Swpaul			}
25749076Swpaul			for (i=0; i<na->num_tx_desc; i++)
25849076Swpaul				na->tx_rings[r].tx_pool[i] = NULL;
25949076Swpaul			for (i=0; i<na->num_tx_desc; i++) {
26049076Swpaul				m = netmap_get_mbuf(GENERIC_BUF_SIZE);
26149076Swpaul				if (!m) {
26249076Swpaul					D("tx_pool[%d] allocation failed", i);
26349076Swpaul					error = ENOMEM;
26449076Swpaul					goto free_tx_pools;
26549076Swpaul				}
26649076Swpaul				na->tx_rings[r].tx_pool[i] = m;
26749076Swpaul			}
26849076Swpaul		}
26949076Swpaul		rtnl_lock();
27049076Swpaul		/* Prepare to intercept incoming traffic. */
27149076Swpaul		error = netmap_catch_rx(na, 1);
27249076Swpaul		if (error) {
27349076Swpaul			D("netdev_rx_handler_register() failed (%d)", error);
27449076Swpaul			goto register_handler;
27549076Swpaul		}
27649076Swpaul		ifp->if_capenable |= IFCAP_NETMAP;
27749076Swpaul
27849076Swpaul		/* Make netmap control the packet steering. */
27949076Swpaul		netmap_catch_tx(gna, 1);
28049076Swpaul
28149076Swpaul		rtnl_unlock();
28249076Swpaul
28349076Swpaul#ifdef RATE
28449076Swpaul		if (rate_ctx.refcount == 0) {
28549076Swpaul			D("setup_timer()");
28649076Swpaul			memset(&rate_ctx, 0, sizeof(rate_ctx));
28749076Swpaul			setup_timer(&rate_ctx.timer, &rate_callback, (unsigned long)&rate_ctx);
28849076Swpaul			if (mod_timer(&rate_ctx.timer, jiffies + msecs_to_jiffies(1500))) {
28949076Swpaul				D("Error: mod_timer()");
29049076Swpaul			}
29149076Swpaul		}
29249076Swpaul		rate_ctx.refcount++;
29349076Swpaul#endif /* RATE */
29449076Swpaul
29549076Swpaul	} else if (na->tx_rings[0].tx_pool) {
29649076Swpaul		/* Disable netmap mode. We enter here only if the previous
29749076Swpaul		   generic_netmap_register(na, 1) was successfull.
29849076Swpaul		   If it was not, na->tx_rings[0].tx_pool was set to NULL by the
29949076Swpaul		   error handling code below. */
30049076Swpaul		rtnl_lock();
30149076Swpaul
30249076Swpaul		ifp->if_capenable &= ~IFCAP_NETMAP;
30349076Swpaul
30449076Swpaul		/* Release packet steering control. */
30549076Swpaul		netmap_catch_tx(gna, 0);
30649076Swpaul
30749076Swpaul		/* Do not intercept packets on the rx path. */
30849076Swpaul		netmap_catch_rx(na, 0);
30949076Swpaul
31049076Swpaul		rtnl_unlock();
31149076Swpaul
31249076Swpaul		/* Free the mbufs going to the netmap rings */
31349076Swpaul		for (r=0; r<na->num_rx_rings; r++) {
31449076Swpaul			mbq_safe_purge(&na->rx_rings[r].rx_queue);
31549076Swpaul			mbq_safe_destroy(&na->rx_rings[r].rx_queue);
31649076Swpaul		}
31749076Swpaul
31849076Swpaul		for (r=0; r<na->num_rx_rings; r++)
31949076Swpaul			netmap_mitigation_cleanup(&gna->mit[r]);
32049076Swpaul		free(gna->mit, M_DEVBUF);
32149076Swpaul
32249076Swpaul		for (r=0; r<na->num_tx_rings; r++) {
32349076Swpaul			for (i=0; i<na->num_tx_desc; i++) {
32449076Swpaul				m_freem(na->tx_rings[r].tx_pool[i]);
32549076Swpaul			}
32649076Swpaul			free(na->tx_rings[r].tx_pool, M_DEVBUF);
32749076Swpaul		}
32849076Swpaul
32949076Swpaul#ifdef RATE
33049076Swpaul		if (--rate_ctx.refcount == 0) {
33149076Swpaul			D("del_timer()");
33249076Swpaul			del_timer(&rate_ctx.timer);
33349076Swpaul		}
33449076Swpaul#endif
33549076Swpaul	}
33649076Swpaul
33749076Swpaul#ifdef REG_RESET
33849076Swpaul	error = ifp->netdev_ops->ndo_open(ifp);
33949076Swpaul	if (error) {
34049076Swpaul		goto free_tx_pools;
34149076Swpaul	}
34249076Swpaul#endif
34349076Swpaul
34449076Swpaul	return 0;
34549076Swpaul
34649076Swpaulregister_handler:
34749076Swpaul	rtnl_unlock();
34849076Swpaulfree_tx_pools:
34949076Swpaul	for (r=0; r<na->num_tx_rings; r++) {
35049076Swpaul		if (na->tx_rings[r].tx_pool == NULL)
35149076Swpaul			continue;
35281737Swpaul		for (i=0; i<na->num_tx_desc; i++)
35381737Swpaul			if (na->tx_rings[r].tx_pool[i])
35449076Swpaul				m_freem(na->tx_rings[r].tx_pool[i]);
35549076Swpaul		free(na->tx_rings[r].tx_pool, M_DEVBUF);
35649076Swpaul		na->tx_rings[r].tx_pool = NULL;
35749076Swpaul	}
35849076Swpaul	for (r=0; r<na->num_rx_rings; r++) {
35949076Swpaul		netmap_mitigation_cleanup(&gna->mit[r]);
36049076Swpaul		mbq_safe_destroy(&na->rx_rings[r].rx_queue);
36149076Swpaul	}
36249076Swpaul	free(gna->mit, M_DEVBUF);
36349076Swpaulout:
36449076Swpaul
36549076Swpaul	return error;
36649076Swpaul}
36749076Swpaul
36849076Swpaul/*
36949076Swpaul * Callback invoked when the device driver frees an mbuf used
37049076Swpaul * by netmap to transmit a packet. This usually happens when
37149076Swpaul * the NIC notifies the driver that transmission is completed.
37249076Swpaul */
37349076Swpaulstatic void
37449076Swpaulgeneric_mbuf_destructor(struct mbuf *m)
37549076Swpaul{
37649076Swpaul	if (netmap_verbose)
37749076Swpaul		D("Tx irq (%p) queue %d", m, MBUF_TXQ(m));
37849076Swpaul	netmap_generic_irq(MBUF_IFP(m), MBUF_TXQ(m), NULL);
37949076Swpaul#ifdef __FreeBSD__
38049076Swpaul	m->m_ext.ext_type = EXT_PACKET;
38149076Swpaul	m->m_ext.ext_free = NULL;
38249076Swpaul	if (*(m->m_ext.ref_cnt) == 0)
38349076Swpaul		*(m->m_ext.ref_cnt) = 1;
38449076Swpaul	uma_zfree(zone_pack, m);
38549076Swpaul#endif /* __FreeBSD__ */
38649076Swpaul	IFRATE(rate_ctx.new.txirq++);
38749076Swpaul}
38849076Swpaul
38949076Swpaul/* Record completed transmissions and update hwtail.
39049076Swpaul *
39149076Swpaul * The oldest tx buffer not yet completed is at nr_hwtail + 1,
39249076Swpaul * nr_hwcur is the first unsent buffer.
39349076Swpaul */
39449076Swpaulstatic u_int
39549076Swpaulgeneric_netmap_tx_clean(struct netmap_kring *kring)
39649076Swpaul{
39749076Swpaul	u_int const lim = kring->nkr_num_slots - 1;
39849076Swpaul	u_int nm_i = nm_next(kring->nr_hwtail, lim);
39949076Swpaul	u_int hwcur = kring->nr_hwcur;
40049076Swpaul	u_int n = 0;
40149076Swpaul	struct mbuf **tx_pool = kring->tx_pool;
40249076Swpaul
40349076Swpaul	while (nm_i != hwcur) { /* buffers not completed */
40449076Swpaul		struct mbuf *m = tx_pool[nm_i];
40549076Swpaul
40649076Swpaul		if (unlikely(m == NULL)) {
40749076Swpaul			/* this is done, try to replenish the entry */
40849076Swpaul			tx_pool[nm_i] = m = netmap_get_mbuf(GENERIC_BUF_SIZE);
40949076Swpaul			if (unlikely(m == NULL)) {
41049076Swpaul				D("mbuf allocation failed, XXX error");
41149076Swpaul				// XXX how do we proceed ? break ?
41249076Swpaul				return -ENOMEM;
41349076Swpaul			}
41449076Swpaul		} else if (GET_MBUF_REFCNT(m) != 1) {
41549076Swpaul			break; /* This mbuf is still busy: its refcnt is 2. */
41649076Swpaul		}
41749076Swpaul		n++;
41849076Swpaul		nm_i = nm_next(nm_i, lim);
41949076Swpaul	}
42049076Swpaul	kring->nr_hwtail = nm_prev(nm_i, lim);
42149076Swpaul	ND("tx completed [%d] -> hwtail %d", n, kring->nr_hwtail);
42249076Swpaul
42349076Swpaul	return n;
42449076Swpaul}
42549076Swpaul
42649076Swpaul
42749076Swpaul/*
42849076Swpaul * We have pending packets in the driver between nr_hwtail +1 and hwcur.
42949076Swpaul * Compute a position in the middle, to be used to generate
43049076Swpaul * a notification.
43149076Swpaul */
43249076Swpaulstatic inline u_int
43349076Swpaulgeneric_tx_event_middle(struct netmap_kring *kring, u_int hwcur)
43449076Swpaul{
43549076Swpaul	u_int n = kring->nkr_num_slots;
43649076Swpaul	u_int ntc = nm_next(kring->nr_hwtail, n-1);
43749076Swpaul	u_int e;
43849076Swpaul
43949076Swpaul	if (hwcur >= ntc) {
44049076Swpaul		e = (hwcur + ntc) / 2;
44149076Swpaul	} else { /* wrap around */
44249076Swpaul		e = (hwcur + n + ntc) / 2;
44349076Swpaul		if (e >= n) {
44449076Swpaul			e -= n;
44549076Swpaul		}
44649076Swpaul	}
44749076Swpaul
44849076Swpaul	if (unlikely(e >= n)) {
44949076Swpaul		D("This cannot happen");
45049076Swpaul		e = 0;
45149076Swpaul	}
45249076Swpaul
45349076Swpaul	return e;
45449076Swpaul}
45549076Swpaul
45649076Swpaul/*
45749076Swpaul * We have pending packets in the driver between nr_hwtail+1 and hwcur.
45849076Swpaul * Schedule a notification approximately in the middle of the two.
45949076Swpaul * There is a race but this is only called within txsync which does
46049076Swpaul * a double check.
46149076Swpaul */
46249076Swpaulstatic void
46349076Swpaulgeneric_set_tx_event(struct netmap_kring *kring, u_int hwcur)
46449076Swpaul{
46549076Swpaul	struct mbuf *m;
46649076Swpaul	u_int e;
46749076Swpaul
46849076Swpaul	if (nm_next(kring->nr_hwtail, kring->nkr_num_slots -1) == hwcur) {
46949076Swpaul		return; /* all buffers are free */
47049076Swpaul	}
47149076Swpaul	e = generic_tx_event_middle(kring, hwcur);
47249076Swpaul
47349076Swpaul	m = kring->tx_pool[e];
47449076Swpaul	if (m == NULL) {
47549076Swpaul		/* This can happen if there is already an event on the netmap
47649076Swpaul		   slot 'e': There is nothing to do. */
47749076Swpaul		return;
47849076Swpaul	}
47949076Swpaul	ND("Event at %d mbuf %p refcnt %d", e, m, GET_MBUF_REFCNT(m));
48049076Swpaul	kring->tx_pool[e] = NULL;
48149076Swpaul	SET_MBUF_DESTRUCTOR(m, generic_mbuf_destructor);
48249076Swpaul
48349076Swpaul	// XXX wmb() ?
48449076Swpaul	/* Decrement the refcount an free it if we have the last one. */
48549076Swpaul	m_freem(m);
48649076Swpaul	smp_mb();
48749076Swpaul}
48849076Swpaul
48949076Swpaul
49049076Swpaul/*
49149076Swpaul * generic_netmap_txsync() transforms netmap buffers into mbufs
49249076Swpaul * and passes them to the standard device driver
49349076Swpaul * (ndo_start_xmit() or ifp->if_transmit() ).
49449076Swpaul * On linux this is not done directly, but using dev_queue_xmit(),
49549076Swpaul * since it implements the TX flow control (and takes some locks).
49649076Swpaul */
49749076Swpaulstatic int
49849076Swpaulgeneric_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
49949076Swpaul{
50049076Swpaul	struct ifnet *ifp = na->ifp;
50149076Swpaul	struct netmap_kring *kring = &na->tx_rings[ring_nr];
50249076Swpaul	struct netmap_ring *ring = kring->ring;
50349076Swpaul	u_int nm_i;	/* index into the netmap ring */ // j
50449076Swpaul	u_int const lim = kring->nkr_num_slots - 1;
50549076Swpaul	u_int const head = kring->rhead;
50649076Swpaul
50749076Swpaul	IFRATE(rate_ctx.new.txsync++);
50849076Swpaul
50949076Swpaul	// TODO: handle the case of mbuf allocation failure
51049076Swpaul
51149076Swpaul	rmb();
51249076Swpaul
51349076Swpaul	/*
51449076Swpaul	 * First part: process new packets to send.
51549076Swpaul	 */
51649076Swpaul	nm_i = kring->nr_hwcur;
51749076Swpaul	if (nm_i != head) {	/* we have new packets to send */
51849076Swpaul		while (nm_i != head) {
51949076Swpaul			struct netmap_slot *slot = &ring->slot[nm_i];
52049076Swpaul			u_int len = slot->len;
52149076Swpaul			void *addr = NMB(slot);
52249076Swpaul
52349076Swpaul			/* device-specific */
52449076Swpaul			struct mbuf *m;
52549076Swpaul			int tx_ret;
52649076Swpaul
52749076Swpaul			NM_CHECK_ADDR_LEN(addr, len);
52849076Swpaul
52949076Swpaul			/* Tale a mbuf from the tx pool and copy in the user packet. */
53049076Swpaul			m = kring->tx_pool[nm_i];
53149076Swpaul			if (unlikely(!m)) {
53249076Swpaul				RD(5, "This should never happen");
53349076Swpaul				kring->tx_pool[nm_i] = m = netmap_get_mbuf(GENERIC_BUF_SIZE);
53449076Swpaul				if (unlikely(m == NULL)) {
53549076Swpaul					D("mbuf allocation failed");
53649076Swpaul					break;
53749076Swpaul				}
53849076Swpaul			}
53949076Swpaul			/* XXX we should ask notifications when NS_REPORT is set,
54049076Swpaul			 * or roughly every half frame. We can optimize this
54149076Swpaul			 * by lazily requesting notifications only when a
54249076Swpaul			 * transmission fails. Probably the best way is to
54349076Swpaul			 * break on failures and set notifications when
54449076Swpaul			 * ring->cur == ring->tail || nm_i != cur
54549076Swpaul			 */
54649076Swpaul			tx_ret = generic_xmit_frame(ifp, m, addr, len, ring_nr);
54749076Swpaul			if (unlikely(tx_ret)) {
54849076Swpaul				RD(5, "start_xmit failed: err %d [nm_i %u, head %u, hwtail %u]",
54949076Swpaul						tx_ret, nm_i, head, kring->nr_hwtail);
55049076Swpaul				/*
55149076Swpaul				 * No room for this mbuf in the device driver.
55249076Swpaul				 * Request a notification FOR A PREVIOUS MBUF,
55349076Swpaul				 * then call generic_netmap_tx_clean(kring) to do the
55449076Swpaul				 * double check and see if we can free more buffers.
55549076Swpaul				 * If there is space continue, else break;
55649076Swpaul				 * NOTE: the double check is necessary if the problem
55749076Swpaul				 * occurs in the txsync call after selrecord().
55849076Swpaul				 * Also, we need some way to tell the caller that not
55949076Swpaul				 * all buffers were queued onto the device (this was
56049076Swpaul				 * not a problem with native netmap driver where space
56149076Swpaul				 * is preallocated). The bridge has a similar problem
56249076Swpaul				 * and we solve it there by dropping the excess packets.
56349076Swpaul				 */
56449076Swpaul				generic_set_tx_event(kring, nm_i);
56549076Swpaul				if (generic_netmap_tx_clean(kring)) { /* space now available */
56649076Swpaul					continue;
56749076Swpaul				} else {
56849076Swpaul					break;
56949076Swpaul				}
57049076Swpaul			}
57149076Swpaul			slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
57249076Swpaul			nm_i = nm_next(nm_i, lim);
57349076Swpaul			IFRATE(rate_ctx.new.txpkt ++);
57449076Swpaul		}
57549076Swpaul
57649076Swpaul		/* Update hwcur to the next slot to transmit. */
57749076Swpaul		kring->nr_hwcur = nm_i; /* not head, we could break early */
57849076Swpaul	}
57949076Swpaul
58049076Swpaul	/*
58149076Swpaul	 * Second, reclaim completed buffers
58249076Swpaul	 */
58349076Swpaul	if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
58449076Swpaul		/* No more available slots? Set a notification event
58549076Swpaul		 * on a netmap slot that will be cleaned in the future.
58649076Swpaul		 * No doublecheck is performed, since txsync() will be
58749076Swpaul		 * called twice by netmap_poll().
58849076Swpaul		 */
58949076Swpaul		generic_set_tx_event(kring, nm_i);
59049076Swpaul	}
59149076Swpaul	ND("tx #%d, hwtail = %d", n, kring->nr_hwtail);
59249076Swpaul
59349076Swpaul	generic_netmap_tx_clean(kring);
59449076Swpaul
59549076Swpaul	nm_txsync_finalize(kring);
59649076Swpaul
59749076Swpaul	return 0;
59849076Swpaul}
59949076Swpaul
60049076Swpaul
60149076Swpaul/*
60249076Swpaul * This handler is registered (through netmap_catch_rx())
60349076Swpaul * within the attached network interface
60449076Swpaul * in the RX subsystem, so that every mbuf passed up by
60549076Swpaul * the driver can be stolen to the network stack.
60649076Swpaul * Stolen packets are put in a queue where the
60749076Swpaul * generic_netmap_rxsync() callback can extract them.
60849076Swpaul */
60949076Swpaulvoid
61049076Swpaulgeneric_rx_handler(struct ifnet *ifp, struct mbuf *m)
61149076Swpaul{
61249076Swpaul	struct netmap_adapter *na = NA(ifp);
61349076Swpaul	struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
61449076Swpaul	u_int work_done;
61549076Swpaul	u_int rr = MBUF_RXQ(m); // receive ring number
61649076Swpaul
61754161Swpaul	if (rr >= na->num_rx_rings) {
61854161Swpaul		rr = rr % na->num_rx_rings; // XXX expensive...
61954161Swpaul	}
62054161Swpaul
62154161Swpaul	/* limit the size of the queue */
62254161Swpaul	if (unlikely(mbq_len(&na->rx_rings[rr].rx_queue) > 1024)) {
62354161Swpaul		m_freem(m);
62454161Swpaul	} else {
62549076Swpaul		mbq_safe_enqueue(&na->rx_rings[rr].rx_queue, m);
62649076Swpaul	}
62749076Swpaul
62849076Swpaul	if (netmap_generic_mit < 32768) {
62949076Swpaul		/* no rx mitigation, pass notification up */
63049076Swpaul		netmap_generic_irq(na->ifp, rr, &work_done);
63149076Swpaul		IFRATE(rate_ctx.new.rxirq++);
63249076Swpaul	} else {
63349076Swpaul		/* same as send combining, filter notification if there is a
63449076Swpaul		 * pending timer, otherwise pass it up and start a timer.
63549076Swpaul		 */
63649076Swpaul		if (likely(netmap_mitigation_active(&gna->mit[rr]))) {
63749076Swpaul			/* Record that there is some pending work. */
63849076Swpaul			gna->mit[rr].mit_pending = 1;
63949076Swpaul		} else {
64049076Swpaul			netmap_generic_irq(na->ifp, rr, &work_done);
64149076Swpaul			IFRATE(rate_ctx.new.rxirq++);
64249076Swpaul			netmap_mitigation_start(&gna->mit[rr]);
64349076Swpaul		}
64449076Swpaul	}
64549076Swpaul}
64649076Swpaul
64749076Swpaul/*
64849076Swpaul * generic_netmap_rxsync() extracts mbufs from the queue filled by
64949076Swpaul * generic_netmap_rx_handler() and puts their content in the netmap
65049076Swpaul * receive ring.
65149076Swpaul * Access must be protected because the rx handler is asynchronous,
65249076Swpaul */
65349076Swpaulstatic int
65449076Swpaulgeneric_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
65549076Swpaul{
65649076Swpaul	struct netmap_kring *kring = &na->rx_rings[ring_nr];
65749076Swpaul	struct netmap_ring *ring = kring->ring;
65849076Swpaul	u_int nm_i;	/* index into the netmap ring */ //j,
65949076Swpaul	u_int n;
66049076Swpaul	u_int const lim = kring->nkr_num_slots - 1;
66149076Swpaul	u_int const head = nm_rxsync_prologue(kring);
66249076Swpaul	int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
66349076Swpaul
66449076Swpaul	if (head > lim)
66549076Swpaul		return netmap_ring_reinit(kring);
66649076Swpaul
66749076Swpaul	/*
66849076Swpaul	 * First part: import newly received packets.
66949076Swpaul	 */
67049076Swpaul	if (netmap_no_pendintr || force_update) {
67149076Swpaul		/* extract buffers from the rx queue, stop at most one
67249076Swpaul		 * slot before nr_hwcur (stop_i)
67349076Swpaul		 */
67449076Swpaul		uint16_t slot_flags = kring->nkr_slot_flags;
67549076Swpaul		u_int stop_i = nm_prev(kring->nr_hwcur, lim);
67649076Swpaul
67749076Swpaul		nm_i = kring->nr_hwtail; /* first empty slot in the receive ring */
67849076Swpaul		for (n = 0; nm_i != stop_i; n++) {
67949076Swpaul			int len;
68049076Swpaul			void *addr = NMB(&ring->slot[nm_i]);
68149076Swpaul			struct mbuf *m;
68249076Swpaul
68349076Swpaul			/* we only check the address here on generic rx rings */
68449076Swpaul			if (addr == netmap_buffer_base) { /* Bad buffer */
68549076Swpaul				return netmap_ring_reinit(kring);
68649076Swpaul			}
68749076Swpaul			/*
68849076Swpaul			 * Call the locked version of the function.
68949076Swpaul			 * XXX Ideally we could grab a batch of mbufs at once
69049076Swpaul			 * and save some locking overhead.
69149076Swpaul			 */
69249076Swpaul			m = mbq_safe_dequeue(&kring->rx_queue);
69349076Swpaul			if (!m)	/* no more data */
69449076Swpaul				break;
69549076Swpaul			len = MBUF_LEN(m);
69649076Swpaul			m_copydata(m, 0, len, addr);
69749076Swpaul			ring->slot[nm_i].len = len;
69849076Swpaul			ring->slot[nm_i].flags = slot_flags;
69949076Swpaul			m_freem(m);
70049076Swpaul			nm_i = nm_next(nm_i, lim);
70149076Swpaul		}
70249076Swpaul		if (n) {
70349076Swpaul			kring->nr_hwtail = nm_i;
70449076Swpaul			IFRATE(rate_ctx.new.rxpkt += n);
70549076Swpaul		}
70649076Swpaul		kring->nr_kflags &= ~NKR_PENDINTR;
70749076Swpaul	}
70849076Swpaul
70949076Swpaul	// XXX should we invert the order ?
71049076Swpaul	/*
71149076Swpaul	 * Second part: skip past packets that userspace has released.
71249076Swpaul	 */
71349076Swpaul	nm_i = kring->nr_hwcur;
71449076Swpaul	if (nm_i != head) {
71549076Swpaul		/* Userspace has released some packets. */
71649076Swpaul		for (n = 0; nm_i != head; n++) {
71749076Swpaul			struct netmap_slot *slot = &ring->slot[nm_i];
71849076Swpaul
71949076Swpaul			slot->flags &= ~NS_BUF_CHANGED;
72049076Swpaul			nm_i = nm_next(nm_i, lim);
72149076Swpaul		}
72249076Swpaul		kring->nr_hwcur = head;
72349076Swpaul	}
72449076Swpaul	/* tell userspace that there might be new packets. */
72549076Swpaul	nm_rxsync_finalize(kring);
72649076Swpaul	IFRATE(rate_ctx.new.rxsync++);
72749076Swpaul
72849076Swpaul	return 0;
72949076Swpaul}
73049076Swpaul
73149076Swpaulstatic void
73249076Swpaulgeneric_netmap_dtor(struct netmap_adapter *na)
73349076Swpaul{
73449076Swpaul	struct ifnet *ifp = na->ifp;
73549076Swpaul	struct netmap_generic_adapter *gna = (struct netmap_generic_adapter*)na;
73649076Swpaul	struct netmap_adapter *prev_na = gna->prev;
73749076Swpaul
73849076Swpaul	if (prev_na != NULL) {
73949076Swpaul		D("Released generic NA %p", gna);
74049076Swpaul		if_rele(na->ifp);
74149076Swpaul		netmap_adapter_put(prev_na);
74249076Swpaul	}
74349076Swpaul	if (ifp != NULL) {
74449076Swpaul		WNA(ifp) = prev_na;
74549076Swpaul		D("Restored native NA %p", prev_na);
74649076Swpaul		na->ifp = NULL;
74749076Swpaul	}
74849076Swpaul}
74949076Swpaul
75049076Swpaul/*
75149076Swpaul * generic_netmap_attach() makes it possible to use netmap on
75249076Swpaul * a device without native netmap support.
75349076Swpaul * This is less performant than native support but potentially
75449076Swpaul * faster than raw sockets or similar schemes.
75549076Swpaul *
75649076Swpaul * In this "emulated" mode, netmap rings do not necessarily
75749076Swpaul * have the same size as those in the NIC. We use a default
75849076Swpaul * value and possibly override it if the OS has ways to fetch the
75949076Swpaul * actual configuration.
76049076Swpaul */
76149076Swpaulint
76249076Swpaulgeneric_netmap_attach(struct ifnet *ifp)
76349076Swpaul{
76449076Swpaul	struct netmap_adapter *na;
76549076Swpaul	struct netmap_generic_adapter *gna;
76649076Swpaul	int retval;
76749076Swpaul	u_int num_tx_desc, num_rx_desc;
76849076Swpaul
76949076Swpaul	num_tx_desc = num_rx_desc = netmap_generic_ringsize; /* starting point */
77049076Swpaul
77149076Swpaul	generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc);
77249076Swpaul	ND("Netmap ring size: TX = %d, RX = %d", num_tx_desc, num_rx_desc);
77349076Swpaul
77449076Swpaul	gna = malloc(sizeof(*gna), M_DEVBUF, M_NOWAIT | M_ZERO);
77549076Swpaul	if (gna == NULL) {
77649076Swpaul		D("no memory on attach, give up");
77749076Swpaul		return ENOMEM;
77849076Swpaul	}
77949076Swpaul	na = (struct netmap_adapter *)gna;
78049076Swpaul	na->ifp = ifp;
78149076Swpaul	na->num_tx_desc = num_tx_desc;
78249076Swpaul	na->num_rx_desc = num_rx_desc;
78349076Swpaul	na->nm_register = &generic_netmap_register;
78449076Swpaul	na->nm_txsync = &generic_netmap_txsync;
78549076Swpaul	na->nm_rxsync = &generic_netmap_rxsync;
78649076Swpaul	na->nm_dtor = &generic_netmap_dtor;
78749076Swpaul	/* when using generic, IFCAP_NETMAP is set so we force
78849076Swpaul	 * NAF_SKIP_INTR to use the regular interrupt handler
78949076Swpaul	 */
79049076Swpaul	na->na_flags = NAF_SKIP_INTR | NAF_HOST_RINGS;
79149076Swpaul
79249076Swpaul	ND("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)",
79349076Swpaul			ifp->num_tx_queues, ifp->real_num_tx_queues,
79449076Swpaul			ifp->tx_queue_len);
79549076Swpaul	ND("[GNA] num_rx_queues(%d), real_num_rx_queues(%d)",
79649076Swpaul			ifp->num_rx_queues, ifp->real_num_rx_queues);
79749076Swpaul
79849076Swpaul	generic_find_num_queues(ifp, &na->num_tx_rings, &na->num_rx_rings);
79949076Swpaul
80049076Swpaul	retval = netmap_attach_common(na);
80149076Swpaul	if (retval) {
80249076Swpaul		free(gna, M_DEVBUF);
80349076Swpaul	}
80449076Swpaul
80549076Swpaul	return retval;
80649076Swpaul}
80749076Swpaul