1262153Sluigi/*
2262153Sluigi * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
3262153Sluigi *
4262153Sluigi * Redistribution and use in source and binary forms, with or without
5262153Sluigi * modification, are permitted provided that the following conditions
6262153Sluigi * are met:
7262153Sluigi *   1. Redistributions of source code must retain the above copyright
8262153Sluigi *      notice, this list of conditions and the following disclaimer.
9262153Sluigi *   2. Redistributions in binary form must reproduce the above copyright
10262153Sluigi *      notice, this list of conditions and the following disclaimer in the
11262153Sluigi *      documentation and/or other materials provided with the distribution.
12262153Sluigi *
13262153Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14262153Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15262153Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16262153Sluigi * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17262153Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18262153Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19262153Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20262153Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21262153Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22262153Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23262153Sluigi * SUCH DAMAGE.
24262153Sluigi */
25262153Sluigi
26262153Sluigi/*
27262153Sluigi * This module implements netmap support on top of standard,
28262153Sluigi * unmodified device drivers.
29262153Sluigi *
30262153Sluigi * A NIOCREGIF request is handled here if the device does not
31262153Sluigi * have native support. TX and RX rings are emulated as follows:
32262153Sluigi *
33262153Sluigi * NIOCREGIF
34262153Sluigi *	We preallocate a block of TX mbufs (roughly as many as
35262153Sluigi *	tx descriptors; the number is not critical) to speed up
36262153Sluigi *	operation during transmissions. The refcount on most of
37262153Sluigi *	these buffers is artificially bumped up so we can recycle
38262153Sluigi *	them more easily. Also, the destructor is intercepted
39262153Sluigi *	so we use it as an interrupt notification to wake up
40262153Sluigi *	processes blocked on a poll().
41262153Sluigi *
42262153Sluigi *	For each receive ring we allocate one "struct mbq"
43262153Sluigi *	(an mbuf tailq plus a spinlock). We intercept packets
44262153Sluigi *	(through if_input)
45262153Sluigi *	on the receive path and put them in the mbq from which
46262153Sluigi *	netmap receive routines can grab them.
47262153Sluigi *
48262153Sluigi * TX:
49262153Sluigi *	in the generic_txsync() routine, netmap buffers are copied
50262153Sluigi *	(or linked, in a future) to the preallocated mbufs
51262153Sluigi *	and pushed to the transmit queue. Some of these mbufs
52262153Sluigi *	(those with NS_REPORT, or otherwise every half ring)
53262153Sluigi *	have the refcount=1, others have refcount=2.
54262153Sluigi *	When the destructor is invoked, we take that as
55262153Sluigi *	a notification that all mbufs up to that one in
56262153Sluigi *	the specific ring have been completed, and generate
57262153Sluigi *	the equivalent of a transmit interrupt.
58262153Sluigi *
59262153Sluigi * RX:
60262153Sluigi *
61262153Sluigi */
62262153Sluigi
63262153Sluigi#ifdef __FreeBSD__
64262153Sluigi
65262153Sluigi#include <sys/cdefs.h> /* prerequisite */
66262153Sluigi__FBSDID("$FreeBSD$");
67262153Sluigi
68262153Sluigi#include <sys/types.h>
69262153Sluigi#include <sys/errno.h>
70262153Sluigi#include <sys/malloc.h>
71262153Sluigi#include <sys/lock.h>   /* PROT_EXEC */
72262153Sluigi#include <sys/rwlock.h>
73262153Sluigi#include <sys/socket.h> /* sockaddrs */
74262153Sluigi#include <sys/selinfo.h>
75262153Sluigi#include <net/if.h>
76262153Sluigi#include <net/if_var.h>
77262153Sluigi#include <machine/bus.h>        /* bus_dmamap_* in netmap_kern.h */
78262153Sluigi
79262153Sluigi// XXX temporary - D() defined here
80262153Sluigi#include <net/netmap.h>
81262153Sluigi#include <dev/netmap/netmap_kern.h>
82262153Sluigi#include <dev/netmap/netmap_mem2.h>
83262153Sluigi
84262153Sluigi#define rtnl_lock() D("rtnl_lock called");
85262153Sluigi#define rtnl_unlock() D("rtnl_unlock called");
86262153Sluigi#define MBUF_TXQ(m)	((m)->m_pkthdr.flowid)
87262153Sluigi#define MBUF_RXQ(m)	((m)->m_pkthdr.flowid)
88262153Sluigi#define smp_mb()
89262153Sluigi
90262153Sluigi/*
91262153Sluigi * mbuf wrappers
92262153Sluigi */
93262153Sluigi
94262153Sluigi/*
95262153Sluigi * we allocate an EXT_PACKET
96262153Sluigi */
97262153Sluigi#define netmap_get_mbuf(len) m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR|M_NOFREE)
98262153Sluigi
99262153Sluigi/* mbuf destructor, also need to change the type to EXT_EXTREF,
100262153Sluigi * add an M_NOFREE flag, and then clear the flag and
101262153Sluigi * chain into uma_zfree(zone_pack, mf)
102262153Sluigi * (or reinstall the buffer ?)
103262153Sluigi */
104262153Sluigi#define SET_MBUF_DESTRUCTOR(m, fn)	do {		\
105262153Sluigi	(m)->m_ext.ext_free = (void *)fn;	\
106262153Sluigi	(m)->m_ext.ext_type = EXT_EXTREF;	\
107262153Sluigi} while (0)
108262153Sluigi
109262153Sluigi
110262153Sluigi#define GET_MBUF_REFCNT(m)	((m)->m_ext.ref_cnt ? *(m)->m_ext.ref_cnt : -1)
111262153Sluigi
112262153Sluigi
113262153Sluigi
114262153Sluigi#else /* linux */
115262153Sluigi
116262153Sluigi#include "bsd_glue.h"
117262153Sluigi
118262153Sluigi#include <linux/rtnetlink.h>    /* rtnl_[un]lock() */
119262153Sluigi#include <linux/ethtool.h>      /* struct ethtool_ops, get_ringparam */
120262153Sluigi#include <linux/hrtimer.h>
121262153Sluigi
122262153Sluigi//#define RATE  /* Enables communication statistics. */
123262153Sluigi
124262153Sluigi//#define REG_RESET
125262153Sluigi
126262153Sluigi#endif /* linux */
127262153Sluigi
128262153Sluigi
129262153Sluigi/* Common headers. */
130262153Sluigi#include <net/netmap.h>
131262153Sluigi#include <dev/netmap/netmap_kern.h>
132262153Sluigi#include <dev/netmap/netmap_mem2.h>
133262153Sluigi
134262153Sluigi
135262153Sluigi
136262153Sluigi/* ======================== usage stats =========================== */
137262153Sluigi
138262153Sluigi#ifdef RATE
139262153Sluigi#define IFRATE(x) x
140262153Sluigistruct rate_stats {
141262153Sluigi	unsigned long txpkt;
142262153Sluigi	unsigned long txsync;
143262153Sluigi	unsigned long txirq;
144262153Sluigi	unsigned long rxpkt;
145262153Sluigi	unsigned long rxirq;
146262153Sluigi	unsigned long rxsync;
147262153Sluigi};
148262153Sluigi
149262153Sluigistruct rate_context {
150262153Sluigi	unsigned refcount;
151262153Sluigi	struct timer_list timer;
152262153Sluigi	struct rate_stats new;
153262153Sluigi	struct rate_stats old;
154262153Sluigi};
155262153Sluigi
156262153Sluigi#define RATE_PRINTK(_NAME_) \
157262153Sluigi	printk( #_NAME_ " = %lu Hz\n", (cur._NAME_ - ctx->old._NAME_)/RATE_PERIOD);
158262153Sluigi#define RATE_PERIOD  2
159262153Sluigistatic void rate_callback(unsigned long arg)
160262153Sluigi{
161262153Sluigi	struct rate_context * ctx = (struct rate_context *)arg;
162262153Sluigi	struct rate_stats cur = ctx->new;
163262153Sluigi	int r;
164262153Sluigi
165262153Sluigi	RATE_PRINTK(txpkt);
166262153Sluigi	RATE_PRINTK(txsync);
167262153Sluigi	RATE_PRINTK(txirq);
168262153Sluigi	RATE_PRINTK(rxpkt);
169262153Sluigi	RATE_PRINTK(rxsync);
170262153Sluigi	RATE_PRINTK(rxirq);
171262153Sluigi	printk("\n");
172262153Sluigi
173262153Sluigi	ctx->old = cur;
174262153Sluigi	r = mod_timer(&ctx->timer, jiffies +
175262153Sluigi			msecs_to_jiffies(RATE_PERIOD * 1000));
176262153Sluigi	if (unlikely(r))
177262153Sluigi		D("[v1000] Error: mod_timer()");
178262153Sluigi}
179262153Sluigi
180262153Sluigistatic struct rate_context rate_ctx;
181262153Sluigi
182262153Sluigi#else /* !RATE */
183262153Sluigi#define IFRATE(x)
184262153Sluigi#endif /* !RATE */
185262153Sluigi
186262153Sluigi
187262153Sluigi/* =============== GENERIC NETMAP ADAPTER SUPPORT ================= */
188262153Sluigi#define GENERIC_BUF_SIZE        netmap_buf_size    /* Size of the mbufs in the Tx pool. */
189262153Sluigi
190262153Sluigi/*
191262153Sluigi * Wrapper used by the generic adapter layer to notify
192262153Sluigi * the poller threads. Differently from netmap_rx_irq(), we check
193262153Sluigi * only IFCAP_NETMAP instead of NAF_NATIVE_ON to enable the irq.
194262153Sluigi */
195262153Sluigistatic void
196262153Sluiginetmap_generic_irq(struct ifnet *ifp, u_int q, u_int *work_done)
197262153Sluigi{
198262153Sluigi	if (unlikely(!(ifp->if_capenable & IFCAP_NETMAP)))
199262153Sluigi		return;
200262153Sluigi
201262153Sluigi	netmap_common_irq(ifp, q, work_done);
202262153Sluigi}
203262153Sluigi
204262153Sluigi
205262153Sluigi/* Enable/disable netmap mode for a generic network interface. */
206262153Sluigistatic int
207262153Sluigigeneric_netmap_register(struct netmap_adapter *na, int enable)
208262153Sluigi{
209262153Sluigi	struct ifnet *ifp = na->ifp;
210262153Sluigi	struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
211262153Sluigi	struct mbuf *m;
212262153Sluigi	int error;
213262153Sluigi	int i, r;
214262153Sluigi
215262153Sluigi	if (!na)
216262153Sluigi		return EINVAL;
217262153Sluigi
218262153Sluigi#ifdef REG_RESET
219262153Sluigi	error = ifp->netdev_ops->ndo_stop(ifp);
220262153Sluigi	if (error) {
221262153Sluigi		return error;
222262153Sluigi	}
223262153Sluigi#endif /* REG_RESET */
224262153Sluigi
225262153Sluigi	if (enable) { /* Enable netmap mode. */
226262153Sluigi		/* Init the mitigation support. */
227262153Sluigi		gna->mit = malloc(na->num_rx_rings * sizeof(struct nm_generic_mit),
228262153Sluigi					M_DEVBUF, M_NOWAIT | M_ZERO);
229262153Sluigi		if (!gna->mit) {
230262153Sluigi			D("mitigation allocation failed");
231262153Sluigi			error = ENOMEM;
232262153Sluigi			goto out;
233262153Sluigi		}
234262153Sluigi		for (r=0; r<na->num_rx_rings; r++)
235262153Sluigi			netmap_mitigation_init(&gna->mit[r], na);
236262153Sluigi
237262153Sluigi		/* Initialize the rx queue, as generic_rx_handler() can
238262153Sluigi		 * be called as soon as netmap_catch_rx() returns.
239262153Sluigi		 */
240262153Sluigi		for (r=0; r<na->num_rx_rings; r++) {
241262153Sluigi			mbq_safe_init(&na->rx_rings[r].rx_queue);
242262153Sluigi		}
243262153Sluigi
244262153Sluigi		/*
245262153Sluigi		 * Preallocate packet buffers for the tx rings.
246262153Sluigi		 */
247262153Sluigi		for (r=0; r<na->num_tx_rings; r++)
248262153Sluigi			na->tx_rings[r].tx_pool = NULL;
249262153Sluigi		for (r=0; r<na->num_tx_rings; r++) {
250262153Sluigi			na->tx_rings[r].tx_pool = malloc(na->num_tx_desc * sizeof(struct mbuf *),
251262153Sluigi					M_DEVBUF, M_NOWAIT | M_ZERO);
252262153Sluigi			if (!na->tx_rings[r].tx_pool) {
253262153Sluigi				D("tx_pool allocation failed");
254262153Sluigi				error = ENOMEM;
255262153Sluigi				goto free_tx_pools;
256262153Sluigi			}
257262153Sluigi			for (i=0; i<na->num_tx_desc; i++)
258262153Sluigi				na->tx_rings[r].tx_pool[i] = NULL;
259262153Sluigi			for (i=0; i<na->num_tx_desc; i++) {
260262153Sluigi				m = netmap_get_mbuf(GENERIC_BUF_SIZE);
261262153Sluigi				if (!m) {
262262153Sluigi					D("tx_pool[%d] allocation failed", i);
263262153Sluigi					error = ENOMEM;
264262153Sluigi					goto free_tx_pools;
265262153Sluigi				}
266262153Sluigi				na->tx_rings[r].tx_pool[i] = m;
267262153Sluigi			}
268262153Sluigi		}
269262153Sluigi		rtnl_lock();
270262153Sluigi		/* Prepare to intercept incoming traffic. */
271262153Sluigi		error = netmap_catch_rx(na, 1);
272262153Sluigi		if (error) {
273262153Sluigi			D("netdev_rx_handler_register() failed (%d)", error);
274262153Sluigi			goto register_handler;
275262153Sluigi		}
276262153Sluigi		ifp->if_capenable |= IFCAP_NETMAP;
277262153Sluigi
278262153Sluigi		/* Make netmap control the packet steering. */
279262153Sluigi		netmap_catch_tx(gna, 1);
280262153Sluigi
281262153Sluigi		rtnl_unlock();
282262153Sluigi
283262153Sluigi#ifdef RATE
284262153Sluigi		if (rate_ctx.refcount == 0) {
285262153Sluigi			D("setup_timer()");
286262153Sluigi			memset(&rate_ctx, 0, sizeof(rate_ctx));
287262153Sluigi			setup_timer(&rate_ctx.timer, &rate_callback, (unsigned long)&rate_ctx);
288262153Sluigi			if (mod_timer(&rate_ctx.timer, jiffies + msecs_to_jiffies(1500))) {
289262153Sluigi				D("Error: mod_timer()");
290262153Sluigi			}
291262153Sluigi		}
292262153Sluigi		rate_ctx.refcount++;
293262153Sluigi#endif /* RATE */
294262153Sluigi
295262153Sluigi	} else if (na->tx_rings[0].tx_pool) {
296262153Sluigi		/* Disable netmap mode. We enter here only if the previous
297262153Sluigi		   generic_netmap_register(na, 1) was successfull.
298262153Sluigi		   If it was not, na->tx_rings[0].tx_pool was set to NULL by the
299262153Sluigi		   error handling code below. */
300262153Sluigi		rtnl_lock();
301262153Sluigi
302262153Sluigi		ifp->if_capenable &= ~IFCAP_NETMAP;
303262153Sluigi
304262153Sluigi		/* Release packet steering control. */
305262153Sluigi		netmap_catch_tx(gna, 0);
306262153Sluigi
307262153Sluigi		/* Do not intercept packets on the rx path. */
308262153Sluigi		netmap_catch_rx(na, 0);
309262153Sluigi
310262153Sluigi		rtnl_unlock();
311262153Sluigi
312262153Sluigi		/* Free the mbufs going to the netmap rings */
313262153Sluigi		for (r=0; r<na->num_rx_rings; r++) {
314262153Sluigi			mbq_safe_purge(&na->rx_rings[r].rx_queue);
315262153Sluigi			mbq_safe_destroy(&na->rx_rings[r].rx_queue);
316262153Sluigi		}
317262153Sluigi
318262153Sluigi		for (r=0; r<na->num_rx_rings; r++)
319262153Sluigi			netmap_mitigation_cleanup(&gna->mit[r]);
320262153Sluigi		free(gna->mit, M_DEVBUF);
321262153Sluigi
322262153Sluigi		for (r=0; r<na->num_tx_rings; r++) {
323262153Sluigi			for (i=0; i<na->num_tx_desc; i++) {
324262153Sluigi				m_freem(na->tx_rings[r].tx_pool[i]);
325262153Sluigi			}
326262153Sluigi			free(na->tx_rings[r].tx_pool, M_DEVBUF);
327262153Sluigi		}
328262153Sluigi
329262153Sluigi#ifdef RATE
330262153Sluigi		if (--rate_ctx.refcount == 0) {
331262153Sluigi			D("del_timer()");
332262153Sluigi			del_timer(&rate_ctx.timer);
333262153Sluigi		}
334262153Sluigi#endif
335262153Sluigi	}
336262153Sluigi
337262153Sluigi#ifdef REG_RESET
338262153Sluigi	error = ifp->netdev_ops->ndo_open(ifp);
339262153Sluigi	if (error) {
340262153Sluigi		goto free_tx_pools;
341262153Sluigi	}
342262153Sluigi#endif
343262153Sluigi
344262153Sluigi	return 0;
345262153Sluigi
346262153Sluigiregister_handler:
347262153Sluigi	rtnl_unlock();
348262153Sluigifree_tx_pools:
349262153Sluigi	for (r=0; r<na->num_tx_rings; r++) {
350262153Sluigi		if (na->tx_rings[r].tx_pool == NULL)
351262153Sluigi			continue;
352262153Sluigi		for (i=0; i<na->num_tx_desc; i++)
353262153Sluigi			if (na->tx_rings[r].tx_pool[i])
354262153Sluigi				m_freem(na->tx_rings[r].tx_pool[i]);
355262153Sluigi		free(na->tx_rings[r].tx_pool, M_DEVBUF);
356262153Sluigi		na->tx_rings[r].tx_pool = NULL;
357262153Sluigi	}
358262153Sluigi	for (r=0; r<na->num_rx_rings; r++) {
359262153Sluigi		netmap_mitigation_cleanup(&gna->mit[r]);
360262153Sluigi		mbq_safe_destroy(&na->rx_rings[r].rx_queue);
361262153Sluigi	}
362262153Sluigi	free(gna->mit, M_DEVBUF);
363262153Sluigiout:
364262153Sluigi
365262153Sluigi	return error;
366262153Sluigi}
367262153Sluigi
368262153Sluigi/*
369262153Sluigi * Callback invoked when the device driver frees an mbuf used
370262153Sluigi * by netmap to transmit a packet. This usually happens when
371262153Sluigi * the NIC notifies the driver that transmission is completed.
372262153Sluigi */
373262153Sluigistatic void
374262153Sluigigeneric_mbuf_destructor(struct mbuf *m)
375262153Sluigi{
376262153Sluigi	if (netmap_verbose)
377262153Sluigi		D("Tx irq (%p) queue %d", m, MBUF_TXQ(m));
378262153Sluigi	netmap_generic_irq(MBUF_IFP(m), MBUF_TXQ(m), NULL);
379262153Sluigi#ifdef __FreeBSD__
380262153Sluigi	m->m_ext.ext_type = EXT_PACKET;
381262153Sluigi	m->m_ext.ext_free = NULL;
382262153Sluigi	if (*(m->m_ext.ref_cnt) == 0)
383262153Sluigi		*(m->m_ext.ref_cnt) = 1;
384262153Sluigi	uma_zfree(zone_pack, m);
385262153Sluigi#endif /* __FreeBSD__ */
386262153Sluigi	IFRATE(rate_ctx.new.txirq++);
387262153Sluigi}
388262153Sluigi
389262153Sluigi/* Record completed transmissions and update hwtail.
390262153Sluigi *
391262153Sluigi * The oldest tx buffer not yet completed is at nr_hwtail + 1,
392262153Sluigi * nr_hwcur is the first unsent buffer.
393262153Sluigi */
394262153Sluigistatic u_int
395262153Sluigigeneric_netmap_tx_clean(struct netmap_kring *kring)
396262153Sluigi{
397262153Sluigi	u_int const lim = kring->nkr_num_slots - 1;
398262153Sluigi	u_int nm_i = nm_next(kring->nr_hwtail, lim);
399262153Sluigi	u_int hwcur = kring->nr_hwcur;
400262153Sluigi	u_int n = 0;
401262153Sluigi	struct mbuf **tx_pool = kring->tx_pool;
402262153Sluigi
403262153Sluigi	while (nm_i != hwcur) { /* buffers not completed */
404262153Sluigi		struct mbuf *m = tx_pool[nm_i];
405262153Sluigi
406262153Sluigi		if (unlikely(m == NULL)) {
407262153Sluigi			/* this is done, try to replenish the entry */
408262153Sluigi			tx_pool[nm_i] = m = netmap_get_mbuf(GENERIC_BUF_SIZE);
409262153Sluigi			if (unlikely(m == NULL)) {
410262153Sluigi				D("mbuf allocation failed, XXX error");
411262153Sluigi				// XXX how do we proceed ? break ?
412262153Sluigi				return -ENOMEM;
413262153Sluigi			}
414262153Sluigi		} else if (GET_MBUF_REFCNT(m) != 1) {
415262153Sluigi			break; /* This mbuf is still busy: its refcnt is 2. */
416262153Sluigi		}
417262153Sluigi		n++;
418262153Sluigi		nm_i = nm_next(nm_i, lim);
419262153Sluigi	}
420262153Sluigi	kring->nr_hwtail = nm_prev(nm_i, lim);
421262153Sluigi	ND("tx completed [%d] -> hwtail %d", n, kring->nr_hwtail);
422262153Sluigi
423262153Sluigi	return n;
424262153Sluigi}
425262153Sluigi
426262153Sluigi
427262153Sluigi/*
428262153Sluigi * We have pending packets in the driver between nr_hwtail +1 and hwcur.
429262153Sluigi * Compute a position in the middle, to be used to generate
430262153Sluigi * a notification.
431262153Sluigi */
432262153Sluigistatic inline u_int
433262153Sluigigeneric_tx_event_middle(struct netmap_kring *kring, u_int hwcur)
434262153Sluigi{
435262153Sluigi	u_int n = kring->nkr_num_slots;
436262153Sluigi	u_int ntc = nm_next(kring->nr_hwtail, n-1);
437262153Sluigi	u_int e;
438262153Sluigi
439262153Sluigi	if (hwcur >= ntc) {
440262153Sluigi		e = (hwcur + ntc) / 2;
441262153Sluigi	} else { /* wrap around */
442262153Sluigi		e = (hwcur + n + ntc) / 2;
443262153Sluigi		if (e >= n) {
444262153Sluigi			e -= n;
445262153Sluigi		}
446262153Sluigi	}
447262153Sluigi
448262153Sluigi	if (unlikely(e >= n)) {
449262153Sluigi		D("This cannot happen");
450262153Sluigi		e = 0;
451262153Sluigi	}
452262153Sluigi
453262153Sluigi	return e;
454262153Sluigi}
455262153Sluigi
456262153Sluigi/*
457262153Sluigi * We have pending packets in the driver between nr_hwtail+1 and hwcur.
458262153Sluigi * Schedule a notification approximately in the middle of the two.
459262153Sluigi * There is a race but this is only called within txsync which does
460262153Sluigi * a double check.
461262153Sluigi */
462262153Sluigistatic void
463262153Sluigigeneric_set_tx_event(struct netmap_kring *kring, u_int hwcur)
464262153Sluigi{
465262153Sluigi	struct mbuf *m;
466262153Sluigi	u_int e;
467262153Sluigi
468262153Sluigi	if (nm_next(kring->nr_hwtail, kring->nkr_num_slots -1) == hwcur) {
469262153Sluigi		return; /* all buffers are free */
470262153Sluigi	}
471262153Sluigi	e = generic_tx_event_middle(kring, hwcur);
472262153Sluigi
473262153Sluigi	m = kring->tx_pool[e];
474262153Sluigi	if (m == NULL) {
475262153Sluigi		/* This can happen if there is already an event on the netmap
476262153Sluigi		   slot 'e': There is nothing to do. */
477262153Sluigi		return;
478262153Sluigi	}
479262153Sluigi	ND("Event at %d mbuf %p refcnt %d", e, m, GET_MBUF_REFCNT(m));
480262153Sluigi	kring->tx_pool[e] = NULL;
481262153Sluigi	SET_MBUF_DESTRUCTOR(m, generic_mbuf_destructor);
482262153Sluigi
483262153Sluigi	// XXX wmb() ?
484262153Sluigi	/* Decrement the refcount an free it if we have the last one. */
485262153Sluigi	m_freem(m);
486262153Sluigi	smp_mb();
487262153Sluigi}
488262153Sluigi
489262153Sluigi
490262153Sluigi/*
491262153Sluigi * generic_netmap_txsync() transforms netmap buffers into mbufs
492262153Sluigi * and passes them to the standard device driver
493262153Sluigi * (ndo_start_xmit() or ifp->if_transmit() ).
494262153Sluigi * On linux this is not done directly, but using dev_queue_xmit(),
495262153Sluigi * since it implements the TX flow control (and takes some locks).
496262153Sluigi */
497262153Sluigistatic int
498262153Sluigigeneric_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
499262153Sluigi{
500262153Sluigi	struct ifnet *ifp = na->ifp;
501262153Sluigi	struct netmap_kring *kring = &na->tx_rings[ring_nr];
502262153Sluigi	struct netmap_ring *ring = kring->ring;
503262153Sluigi	u_int nm_i;	/* index into the netmap ring */ // j
504262153Sluigi	u_int const lim = kring->nkr_num_slots - 1;
505262153Sluigi	u_int const head = kring->rhead;
506262153Sluigi
507262153Sluigi	IFRATE(rate_ctx.new.txsync++);
508262153Sluigi
509262153Sluigi	// TODO: handle the case of mbuf allocation failure
510262153Sluigi
511262153Sluigi	rmb();
512262153Sluigi
513262153Sluigi	/*
514262153Sluigi	 * First part: process new packets to send.
515262153Sluigi	 */
516262153Sluigi	nm_i = kring->nr_hwcur;
517262153Sluigi	if (nm_i != head) {	/* we have new packets to send */
518262153Sluigi		while (nm_i != head) {
519262153Sluigi			struct netmap_slot *slot = &ring->slot[nm_i];
520262153Sluigi			u_int len = slot->len;
521262153Sluigi			void *addr = NMB(slot);
522262153Sluigi
523262153Sluigi			/* device-specific */
524262153Sluigi			struct mbuf *m;
525262153Sluigi			int tx_ret;
526262153Sluigi
527262153Sluigi			NM_CHECK_ADDR_LEN(addr, len);
528262153Sluigi
529262153Sluigi			/* Tale a mbuf from the tx pool and copy in the user packet. */
530262153Sluigi			m = kring->tx_pool[nm_i];
531262153Sluigi			if (unlikely(!m)) {
532262153Sluigi				RD(5, "This should never happen");
533262153Sluigi				kring->tx_pool[nm_i] = m = netmap_get_mbuf(GENERIC_BUF_SIZE);
534262153Sluigi				if (unlikely(m == NULL)) {
535262153Sluigi					D("mbuf allocation failed");
536262153Sluigi					break;
537262153Sluigi				}
538262153Sluigi			}
539262153Sluigi			/* XXX we should ask notifications when NS_REPORT is set,
540262153Sluigi			 * or roughly every half frame. We can optimize this
541262153Sluigi			 * by lazily requesting notifications only when a
542262153Sluigi			 * transmission fails. Probably the best way is to
543262153Sluigi			 * break on failures and set notifications when
544262153Sluigi			 * ring->cur == ring->tail || nm_i != cur
545262153Sluigi			 */
546262153Sluigi			tx_ret = generic_xmit_frame(ifp, m, addr, len, ring_nr);
547262153Sluigi			if (unlikely(tx_ret)) {
548262153Sluigi				RD(5, "start_xmit failed: err %d [nm_i %u, head %u, hwtail %u]",
549262153Sluigi						tx_ret, nm_i, head, kring->nr_hwtail);
550262153Sluigi				/*
551262153Sluigi				 * No room for this mbuf in the device driver.
552262153Sluigi				 * Request a notification FOR A PREVIOUS MBUF,
553262153Sluigi				 * then call generic_netmap_tx_clean(kring) to do the
554262153Sluigi				 * double check and see if we can free more buffers.
555262153Sluigi				 * If there is space continue, else break;
556262153Sluigi				 * NOTE: the double check is necessary if the problem
557262153Sluigi				 * occurs in the txsync call after selrecord().
558262153Sluigi				 * Also, we need some way to tell the caller that not
559262153Sluigi				 * all buffers were queued onto the device (this was
560262153Sluigi				 * not a problem with native netmap driver where space
561262153Sluigi				 * is preallocated). The bridge has a similar problem
562262153Sluigi				 * and we solve it there by dropping the excess packets.
563262153Sluigi				 */
564262153Sluigi				generic_set_tx_event(kring, nm_i);
565262153Sluigi				if (generic_netmap_tx_clean(kring)) { /* space now available */
566262153Sluigi					continue;
567262153Sluigi				} else {
568262153Sluigi					break;
569262153Sluigi				}
570262153Sluigi			}
571262153Sluigi			slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
572262153Sluigi			nm_i = nm_next(nm_i, lim);
573262153Sluigi			IFRATE(rate_ctx.new.txpkt ++);
574262153Sluigi		}
575262153Sluigi
576262153Sluigi		/* Update hwcur to the next slot to transmit. */
577262153Sluigi		kring->nr_hwcur = nm_i; /* not head, we could break early */
578262153Sluigi	}
579262153Sluigi
580262153Sluigi	/*
581262153Sluigi	 * Second, reclaim completed buffers
582262153Sluigi	 */
583262153Sluigi	if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
584262153Sluigi		/* No more available slots? Set a notification event
585262153Sluigi		 * on a netmap slot that will be cleaned in the future.
586262153Sluigi		 * No doublecheck is performed, since txsync() will be
587262153Sluigi		 * called twice by netmap_poll().
588262153Sluigi		 */
589262153Sluigi		generic_set_tx_event(kring, nm_i);
590262153Sluigi	}
591262153Sluigi	ND("tx #%d, hwtail = %d", n, kring->nr_hwtail);
592262153Sluigi
593262153Sluigi	generic_netmap_tx_clean(kring);
594262153Sluigi
595262153Sluigi	nm_txsync_finalize(kring);
596262153Sluigi
597262153Sluigi	return 0;
598262153Sluigi}
599262153Sluigi
600262153Sluigi
601262153Sluigi/*
602262153Sluigi * This handler is registered (through netmap_catch_rx())
603262153Sluigi * within the attached network interface
604262153Sluigi * in the RX subsystem, so that every mbuf passed up by
605262153Sluigi * the driver can be stolen to the network stack.
606262153Sluigi * Stolen packets are put in a queue where the
607262153Sluigi * generic_netmap_rxsync() callback can extract them.
608262153Sluigi */
609262153Sluigivoid
610262153Sluigigeneric_rx_handler(struct ifnet *ifp, struct mbuf *m)
611262153Sluigi{
612262153Sluigi	struct netmap_adapter *na = NA(ifp);
613262153Sluigi	struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
614262153Sluigi	u_int work_done;
615262153Sluigi	u_int rr = MBUF_RXQ(m); // receive ring number
616262153Sluigi
617262153Sluigi	if (rr >= na->num_rx_rings) {
618262153Sluigi		rr = rr % na->num_rx_rings; // XXX expensive...
619262153Sluigi	}
620262153Sluigi
621262153Sluigi	/* limit the size of the queue */
622262153Sluigi	if (unlikely(mbq_len(&na->rx_rings[rr].rx_queue) > 1024)) {
623262153Sluigi		m_freem(m);
624262153Sluigi	} else {
625262153Sluigi		mbq_safe_enqueue(&na->rx_rings[rr].rx_queue, m);
626262153Sluigi	}
627262153Sluigi
628262153Sluigi	if (netmap_generic_mit < 32768) {
629262153Sluigi		/* no rx mitigation, pass notification up */
630262153Sluigi		netmap_generic_irq(na->ifp, rr, &work_done);
631262153Sluigi		IFRATE(rate_ctx.new.rxirq++);
632262153Sluigi	} else {
633262153Sluigi		/* same as send combining, filter notification if there is a
634262153Sluigi		 * pending timer, otherwise pass it up and start a timer.
635262153Sluigi		 */
636262153Sluigi		if (likely(netmap_mitigation_active(&gna->mit[rr]))) {
637262153Sluigi			/* Record that there is some pending work. */
638262153Sluigi			gna->mit[rr].mit_pending = 1;
639262153Sluigi		} else {
640262153Sluigi			netmap_generic_irq(na->ifp, rr, &work_done);
641262153Sluigi			IFRATE(rate_ctx.new.rxirq++);
642262153Sluigi			netmap_mitigation_start(&gna->mit[rr]);
643262153Sluigi		}
644262153Sluigi	}
645262153Sluigi}
646262153Sluigi
647262153Sluigi/*
648262153Sluigi * generic_netmap_rxsync() extracts mbufs from the queue filled by
649262153Sluigi * generic_netmap_rx_handler() and puts their content in the netmap
650262153Sluigi * receive ring.
651262153Sluigi * Access must be protected because the rx handler is asynchronous,
652262153Sluigi */
653262153Sluigistatic int
654262153Sluigigeneric_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
655262153Sluigi{
656262153Sluigi	struct netmap_kring *kring = &na->rx_rings[ring_nr];
657262153Sluigi	struct netmap_ring *ring = kring->ring;
658262153Sluigi	u_int nm_i;	/* index into the netmap ring */ //j,
659262153Sluigi	u_int n;
660262153Sluigi	u_int const lim = kring->nkr_num_slots - 1;
661262153Sluigi	u_int const head = nm_rxsync_prologue(kring);
662262153Sluigi	int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
663262153Sluigi
664262153Sluigi	if (head > lim)
665262153Sluigi		return netmap_ring_reinit(kring);
666262153Sluigi
667262153Sluigi	/*
668262153Sluigi	 * First part: import newly received packets.
669262153Sluigi	 */
670262153Sluigi	if (netmap_no_pendintr || force_update) {
671262153Sluigi		/* extract buffers from the rx queue, stop at most one
672262153Sluigi		 * slot before nr_hwcur (stop_i)
673262153Sluigi		 */
674262153Sluigi		uint16_t slot_flags = kring->nkr_slot_flags;
675262153Sluigi		u_int stop_i = nm_prev(kring->nr_hwcur, lim);
676262153Sluigi
677262153Sluigi		nm_i = kring->nr_hwtail; /* first empty slot in the receive ring */
678262153Sluigi		for (n = 0; nm_i != stop_i; n++) {
679262153Sluigi			int len;
680262153Sluigi			void *addr = NMB(&ring->slot[nm_i]);
681262153Sluigi			struct mbuf *m;
682262153Sluigi
683262153Sluigi			/* we only check the address here on generic rx rings */
684262153Sluigi			if (addr == netmap_buffer_base) { /* Bad buffer */
685262153Sluigi				return netmap_ring_reinit(kring);
686262153Sluigi			}
687262153Sluigi			/*
688262153Sluigi			 * Call the locked version of the function.
689262153Sluigi			 * XXX Ideally we could grab a batch of mbufs at once
690262153Sluigi			 * and save some locking overhead.
691262153Sluigi			 */
692262153Sluigi			m = mbq_safe_dequeue(&kring->rx_queue);
693262153Sluigi			if (!m)	/* no more data */
694262153Sluigi				break;
695262153Sluigi			len = MBUF_LEN(m);
696262153Sluigi			m_copydata(m, 0, len, addr);
697262153Sluigi			ring->slot[nm_i].len = len;
698262153Sluigi			ring->slot[nm_i].flags = slot_flags;
699262153Sluigi			m_freem(m);
700262153Sluigi			nm_i = nm_next(nm_i, lim);
701262153Sluigi		}
702262153Sluigi		if (n) {
703262153Sluigi			kring->nr_hwtail = nm_i;
704262153Sluigi			IFRATE(rate_ctx.new.rxpkt += n);
705262153Sluigi		}
706262153Sluigi		kring->nr_kflags &= ~NKR_PENDINTR;
707262153Sluigi	}
708262153Sluigi
709262153Sluigi	// XXX should we invert the order ?
710262153Sluigi	/*
711262153Sluigi	 * Second part: skip past packets that userspace has released.
712262153Sluigi	 */
713262153Sluigi	nm_i = kring->nr_hwcur;
714262153Sluigi	if (nm_i != head) {
715262153Sluigi		/* Userspace has released some packets. */
716262153Sluigi		for (n = 0; nm_i != head; n++) {
717262153Sluigi			struct netmap_slot *slot = &ring->slot[nm_i];
718262153Sluigi
719262153Sluigi			slot->flags &= ~NS_BUF_CHANGED;
720262153Sluigi			nm_i = nm_next(nm_i, lim);
721262153Sluigi		}
722262153Sluigi		kring->nr_hwcur = head;
723262153Sluigi	}
724262153Sluigi	/* tell userspace that there might be new packets. */
725262153Sluigi	nm_rxsync_finalize(kring);
726262153Sluigi	IFRATE(rate_ctx.new.rxsync++);
727262153Sluigi
728262153Sluigi	return 0;
729262153Sluigi}
730262153Sluigi
731262153Sluigistatic void
732262153Sluigigeneric_netmap_dtor(struct netmap_adapter *na)
733262153Sluigi{
734262153Sluigi	struct ifnet *ifp = na->ifp;
735262153Sluigi	struct netmap_generic_adapter *gna = (struct netmap_generic_adapter*)na;
736262153Sluigi	struct netmap_adapter *prev_na = gna->prev;
737262153Sluigi
738262153Sluigi	if (prev_na != NULL) {
739262153Sluigi		D("Released generic NA %p", gna);
740262153Sluigi		if_rele(na->ifp);
741262153Sluigi		netmap_adapter_put(prev_na);
742262153Sluigi	}
743262153Sluigi	if (ifp != NULL) {
744262153Sluigi		WNA(ifp) = prev_na;
745262153Sluigi		D("Restored native NA %p", prev_na);
746262153Sluigi		na->ifp = NULL;
747262153Sluigi	}
748262153Sluigi}
749262153Sluigi
750262153Sluigi/*
751262153Sluigi * generic_netmap_attach() makes it possible to use netmap on
752262153Sluigi * a device without native netmap support.
753262153Sluigi * This is less performant than native support but potentially
754262153Sluigi * faster than raw sockets or similar schemes.
755262153Sluigi *
756262153Sluigi * In this "emulated" mode, netmap rings do not necessarily
757262153Sluigi * have the same size as those in the NIC. We use a default
758262153Sluigi * value and possibly override it if the OS has ways to fetch the
759262153Sluigi * actual configuration.
760262153Sluigi */
761262153Sluigiint
762262153Sluigigeneric_netmap_attach(struct ifnet *ifp)
763262153Sluigi{
764262153Sluigi	struct netmap_adapter *na;
765262153Sluigi	struct netmap_generic_adapter *gna;
766262153Sluigi	int retval;
767262153Sluigi	u_int num_tx_desc, num_rx_desc;
768262153Sluigi
769262153Sluigi	num_tx_desc = num_rx_desc = netmap_generic_ringsize; /* starting point */
770262153Sluigi
771262153Sluigi	generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc);
772262153Sluigi	ND("Netmap ring size: TX = %d, RX = %d", num_tx_desc, num_rx_desc);
773262153Sluigi
774262153Sluigi	gna = malloc(sizeof(*gna), M_DEVBUF, M_NOWAIT | M_ZERO);
775262153Sluigi	if (gna == NULL) {
776262153Sluigi		D("no memory on attach, give up");
777262153Sluigi		return ENOMEM;
778262153Sluigi	}
779262153Sluigi	na = (struct netmap_adapter *)gna;
780262153Sluigi	na->ifp = ifp;
781262153Sluigi	na->num_tx_desc = num_tx_desc;
782262153Sluigi	na->num_rx_desc = num_rx_desc;
783262153Sluigi	na->nm_register = &generic_netmap_register;
784262153Sluigi	na->nm_txsync = &generic_netmap_txsync;
785262153Sluigi	na->nm_rxsync = &generic_netmap_rxsync;
786262153Sluigi	na->nm_dtor = &generic_netmap_dtor;
787262153Sluigi	/* when using generic, IFCAP_NETMAP is set so we force
788262153Sluigi	 * NAF_SKIP_INTR to use the regular interrupt handler
789262153Sluigi	 */
790262153Sluigi	na->na_flags = NAF_SKIP_INTR | NAF_HOST_RINGS;
791262153Sluigi
792262153Sluigi	ND("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)",
793262153Sluigi			ifp->num_tx_queues, ifp->real_num_tx_queues,
794262153Sluigi			ifp->tx_queue_len);
795262153Sluigi	ND("[GNA] num_rx_queues(%d), real_num_rx_queues(%d)",
796262153Sluigi			ifp->num_rx_queues, ifp->real_num_rx_queues);
797262153Sluigi
798262153Sluigi	generic_find_num_queues(ifp, &na->num_tx_rings, &na->num_rx_rings);
799262153Sluigi
800262153Sluigi	retval = netmap_attach_common(na);
801262153Sluigi	if (retval) {
802262153Sluigi		free(gna, M_DEVBUF);
803262153Sluigi	}
804262153Sluigi
805262153Sluigi	return retval;
806262153Sluigi}
807