netmap_kern.h revision 285349
1227614Sluigi/*
2260368Sluigi * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
3260368Sluigi * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
4241719Sluigi *
5227614Sluigi * Redistribution and use in source and binary forms, with or without
6227614Sluigi * modification, are permitted provided that the following conditions
7227614Sluigi * are met:
8228276Sluigi *   1. Redistributions of source code must retain the above copyright
9228276Sluigi *      notice, this list of conditions and the following disclaimer.
10228276Sluigi *   2. Redistributions in binary form must reproduce the above copyright
11228276Sluigi *      notice, this list of conditions and the following disclaimer in the
12227614Sluigi *    documentation and/or other materials provided with the distribution.
13241719Sluigi *
14227614Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15227614Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16227614Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17227614Sluigi * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18227614Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19227614Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20227614Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21227614Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22227614Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23227614Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24227614Sluigi * SUCH DAMAGE.
25227614Sluigi */
26227614Sluigi
27227614Sluigi/*
28227614Sluigi * $FreeBSD: head/sys/dev/netmap/netmap_kern.h 285349 2015-07-10 05:51:36Z luigi $
29227614Sluigi *
30227614Sluigi * The header contains the definitions of constants and function
31227614Sluigi * prototypes used only in kernelspace.
32227614Sluigi */
33227614Sluigi
34227614Sluigi#ifndef _NET_NETMAP_KERN_H_
35227614Sluigi#define _NET_NETMAP_KERN_H_
36227614Sluigi
37285349Sluigi#if defined(linux)
38285349Sluigi
39285349Sluigi#if  defined(CONFIG_NETMAP_VALE)
40285349Sluigi#define WITH_VALE
41285349Sluigi#endif
42285349Sluigi#if defined(CONFIG_NETMAP_PIPE)
43285349Sluigi#define WITH_PIPES
44285349Sluigi#endif
45285349Sluigi#if defined(CONFIG_NETMAP_MONITOR)
46285349Sluigi#define WITH_MONITOR
47285349Sluigi#endif
48285349Sluigi#if defined(CONFIG_NETMAP_GENERIC)
49285349Sluigi#define WITH_GENERIC
50285349Sluigi#endif
51285349Sluigi#if defined(CONFIG_NETMAP_V1000)
52285349Sluigi#define WITH_V1000
53285349Sluigi#endif
54285349Sluigi
55285349Sluigi#else /* not linux */
56285349Sluigi
57259412Sluigi#define WITH_VALE	// comment out to disable VALE support
58261909Sluigi#define WITH_PIPES
59270063Sluigi#define WITH_MONITOR
60274362Sluigi#define WITH_GENERIC
61259412Sluigi
62285349Sluigi#endif
63285349Sluigi
64231594Sluigi#if defined(__FreeBSD__)
65250052Sluigi
66257529Sluigi#define likely(x)	__builtin_expect((long)!!(x), 1L)
67257529Sluigi#define unlikely(x)	__builtin_expect((long)!!(x), 0L)
68238812Sluigi
69285349Sluigi#define	NM_LOCK_T	struct mtx	/* low level spinlock, used to protect queues */
70274362Sluigi
71285349Sluigi#define NM_MTX_T	struct sx	/* OS-specific mutex (sleepable) */
72285349Sluigi#define NM_MTX_INIT(m)		sx_init(&(m), #m)
73285349Sluigi#define NM_MTX_DESTROY(m)	sx_destroy(&(m))
74285349Sluigi#define NM_MTX_LOCK(m)		sx_xlock(&(m))
75285349Sluigi#define NM_MTX_UNLOCK(m)	sx_xunlock(&(m))
76285349Sluigi#define NM_MTX_ASSERT(m)	sx_assert(&(m), SA_XLOCKED)
77259412Sluigi
78274459Sluigi#define	NM_SELINFO_T	struct nm_selinfo
79231594Sluigi#define	MBUF_LEN(m)	((m)->m_pkthdr.len)
80259412Sluigi#define	MBUF_IFP(m)	((m)->m_pkthdr.rcvif)
81260368Sluigi#define	NM_SEND_UP(ifp, m)	((NA(ifp))->if_input)(ifp, m)
82250052Sluigi
83259412Sluigi#define NM_ATOMIC_T	volatile int	// XXX ?
84259412Sluigi/* atomic operations */
85259412Sluigi#include <machine/atomic.h>
86259412Sluigi#define NM_ATOMIC_TEST_AND_SET(p)       (!atomic_cmpset_acq_int((p), 0, 1))
87259412Sluigi#define NM_ATOMIC_CLEAR(p)              atomic_store_rel_int((p), 0)
88257529Sluigi
89272110Sluigi#if __FreeBSD_version >= 1100030
90272110Sluigi#define	WNA(_ifp)	(_ifp)->if_netmap
91272110Sluigi#else /* older FreeBSD */
92272110Sluigi#define	WNA(_ifp)	(_ifp)->if_pspare[0]
93272110Sluigi#endif /* older FreeBSD */
94272110Sluigi
95267283Sluigi#if __FreeBSD_version >= 1100005
96267283Sluigistruct netmap_adapter *netmap_getna(if_t ifp);
97267283Sluigi#endif
98259412Sluigi
99270063Sluigi#if __FreeBSD_version >= 1100027
100270063Sluigi#define GET_MBUF_REFCNT(m)      ((m)->m_ext.ext_cnt ? *((m)->m_ext.ext_cnt) : -1)
101270063Sluigi#define SET_MBUF_REFCNT(m, x)   *((m)->m_ext.ext_cnt) = x
102270063Sluigi#define PNT_MBUF_REFCNT(m)      ((m)->m_ext.ext_cnt)
103270063Sluigi#else
104270063Sluigi#define GET_MBUF_REFCNT(m)      ((m)->m_ext.ref_cnt ? *((m)->m_ext.ref_cnt) : -1)
105270063Sluigi#define SET_MBUF_REFCNT(m, x)   *((m)->m_ext.ref_cnt) = x
106270063Sluigi#define PNT_MBUF_REFCNT(m)      ((m)->m_ext.ref_cnt)
107270063Sluigi#endif
108270063Sluigi
109259412SluigiMALLOC_DECLARE(M_NETMAP);
110259412Sluigi
111274459Sluigistruct nm_selinfo {
112274459Sluigi	struct selinfo si;
113274459Sluigi	struct mtx m;
114274459Sluigi};
115274459Sluigi
116274459Sluigivoid freebsd_selwakeup(struct nm_selinfo *si, int pri);
117274459Sluigi
118259412Sluigi// XXX linux struct, not used in FreeBSD
119259412Sluigistruct net_device_ops {
120259412Sluigi};
121270063Sluigistruct ethtool_ops {
122270063Sluigi};
123259412Sluigistruct hrtimer {
124259412Sluigi};
125285349Sluigi#define NM_BNS_GET(b)
126285349Sluigi#define NM_BNS_PUT(b)
127259412Sluigi
128232238Sluigi#elif defined (linux)
129250052Sluigi
130250052Sluigi#define	NM_LOCK_T	safe_spinlock_t	// see bsd_glue.h
131231594Sluigi#define	NM_SELINFO_T	wait_queue_head_t
132231594Sluigi#define	MBUF_LEN(m)	((m)->len)
133259412Sluigi#define	MBUF_IFP(m)	((m)->dev)
134260368Sluigi#define	NM_SEND_UP(ifp, m)  \
135260368Sluigi                        do { \
136270063Sluigi                            m->priority = NM_MAGIC_PRIORITY_RX; \
137260368Sluigi                            netif_rx(m); \
138260368Sluigi                        } while (0)
139238812Sluigi
140257529Sluigi#define NM_ATOMIC_T	volatile long unsigned int
141257529Sluigi
142285349Sluigi#define NM_MTX_T	struct mutex	/* OS-specific sleepable lock */
143285349Sluigi#define NM_MTX_INIT(m)	mutex_init(&(m))
144285349Sluigi#define NM_MTX_DESTROY(m)	do { (void)(m); } while (0)
145274362Sluigi#define NM_MTX_LOCK(m)		mutex_lock(&(m))
146274362Sluigi#define NM_MTX_UNLOCK(m)	mutex_unlock(&(m))
147285349Sluigi#define NM_MTX_ASSERT(m)	mutex_is_locked(&(m))
148259412Sluigi
149238812Sluigi#ifndef DEV_NETMAP
150238812Sluigi#define DEV_NETMAP
151257529Sluigi#endif /* DEV_NETMAP */
152238812Sluigi
153238812Sluigi#elif defined (__APPLE__)
154250052Sluigi
155241719Sluigi#warning apple support is incomplete.
156238812Sluigi#define likely(x)	__builtin_expect(!!(x), 1)
157238812Sluigi#define unlikely(x)	__builtin_expect(!!(x), 0)
158238812Sluigi#define	NM_LOCK_T	IOLock *
159238812Sluigi#define	NM_SELINFO_T	struct selinfo
160238812Sluigi#define	MBUF_LEN(m)	((m)->m_pkthdr.len)
161238812Sluigi#define	NM_SEND_UP(ifp, m)	((ifp)->if_input)(ifp, m)
162238812Sluigi
163238812Sluigi#else
164250052Sluigi
165231594Sluigi#error unsupported platform
166231594Sluigi
167250052Sluigi#endif /* end - platform-specific code */
168250052Sluigi
169285349Sluigi#define	NMG_LOCK_T		NM_MTX_T
170285349Sluigi#define	NMG_LOCK_INIT()		NM_MTX_INIT(netmap_global_lock)
171285349Sluigi#define	NMG_LOCK_DESTROY()	NM_MTX_DESTROY(netmap_global_lock)
172285349Sluigi#define	NMG_LOCK()		NM_MTX_LOCK(netmap_global_lock)
173285349Sluigi#define	NMG_UNLOCK()		NM_MTX_UNLOCK(netmap_global_lock)
174285349Sluigi#define	NMG_LOCK_ASSERT()	NM_MTX_ASSERT(netmap_global_lock)
175285349Sluigi
176227614Sluigi#define ND(format, ...)
177230058Sluigi#define D(format, ...)						\
178230058Sluigi	do {							\
179230058Sluigi		struct timeval __xxts;				\
180227614Sluigi		microtime(&__xxts);				\
181260368Sluigi		printf("%03d.%06d [%4d] %-25s " format "\n",	\
182230058Sluigi		(int)__xxts.tv_sec % 1000, (int)__xxts.tv_usec,	\
183260368Sluigi		__LINE__, __FUNCTION__, ##__VA_ARGS__);		\
184227614Sluigi	} while (0)
185241719Sluigi
186241719Sluigi/* rate limited, lps indicates how many per second */
187241719Sluigi#define RD(lps, format, ...)					\
188241719Sluigi	do {							\
189241719Sluigi		static int t0, __cnt;				\
190241719Sluigi		if (t0 != time_second) {			\
191241719Sluigi			t0 = time_second;			\
192241719Sluigi			__cnt = 0;				\
193241719Sluigi		}						\
194241719Sluigi		if (__cnt++ < lps)				\
195241719Sluigi			D(format, ##__VA_ARGS__);		\
196241719Sluigi	} while (0)
197241719Sluigi
198227614Sluigistruct netmap_adapter;
199251139Sluigistruct nm_bdg_fwd;
200251139Sluigistruct nm_bridge;
201251139Sluigistruct netmap_priv_d;
202227614Sluigi
203257529Sluigiconst char *nm_dump_buf(char *p, int len, int lim, char *dst);
204257529Sluigi
205259412Sluigi#include "netmap_mbq.h"
206259412Sluigi
207259412Sluigiextern NMG_LOCK_T	netmap_global_lock;
208259412Sluigi
209285349Sluigienum txrx { NR_RX = 0, NR_TX = 1, NR_TXRX };
210285349Sluigi
211285349Sluigistatic __inline const char*
212285349Sluiginm_txrx2str(enum txrx t)
213285349Sluigi{
214285349Sluigi	return (t== NR_RX ? "RX" : "TX");
215285349Sluigi}
216285349Sluigi
217285349Sluigistatic __inline enum txrx
218285349Sluiginm_txrx_swap(enum txrx t)
219285349Sluigi{
220285349Sluigi	return (t== NR_RX ? NR_TX : NR_RX);
221285349Sluigi}
222285349Sluigi
223285349Sluigi#define for_rx_tx(t)	for ((t) = 0; (t) < NR_TXRX; (t)++)
224285349Sluigi
225285349Sluigi
226227614Sluigi/*
227232238Sluigi * private, kernel view of a ring. Keeps track of the status of
228232238Sluigi * a ring across system calls.
229227614Sluigi *
230232238Sluigi *	nr_hwcur	index of the next buffer to refill.
231260368Sluigi *			It corresponds to ring->head
232260368Sluigi *			at the time the system call returns.
233232238Sluigi *
234260368Sluigi *	nr_hwtail	index of the first buffer owned by the kernel.
235260368Sluigi *			On RX, hwcur->hwtail are receive buffers
236260368Sluigi *			not yet released. hwcur is advanced following
237260368Sluigi *			ring->head, hwtail is advanced on incoming packets,
238260368Sluigi *			and a wakeup is generated when hwtail passes ring->cur
239260368Sluigi *			    On TX, hwcur->rcur have been filled by the sender
240260368Sluigi *			but not sent yet to the NIC; rcur->hwtail are available
241260368Sluigi *			for new transmissions, and hwtail->hwcur-1 are pending
242260368Sluigi *			transmissions not yet acknowledged.
243232238Sluigi *
244231594Sluigi * The indexes in the NIC and netmap rings are offset by nkr_hwofs slots.
245227614Sluigi * This is so that, on a reset, buffers owned by userspace are not
246227614Sluigi * modified by the kernel. In particular:
247260368Sluigi * RX rings: the next empty buffer (hwtail + hwofs) coincides with
248227614Sluigi * 	the next empty buffer as known by the hardware (next_to_check or so).
249227614Sluigi * TX rings: hwcur + hwofs coincides with next_to_send
250245579Sluigi *
251245579Sluigi * For received packets, slot->flags is set to nkr_slot_flags
252245579Sluigi * so we can provide a proper initial value (e.g. set NS_FORWARD
253245579Sluigi * when operating in 'transparent' mode).
254257529Sluigi *
255257529Sluigi * The following fields are used to implement lock-free copy of packets
256257529Sluigi * from input to output ports in VALE switch:
257257529Sluigi *	nkr_hwlease	buffer after the last one being copied.
258257529Sluigi *			A writer in nm_bdg_flush reserves N buffers
259257529Sluigi *			from nr_hwlease, advances it, then does the
260257529Sluigi *			copy outside the lock.
261257529Sluigi *			In RX rings (used for VALE ports),
262260368Sluigi *			nkr_hwtail <= nkr_hwlease < nkr_hwcur+N-1
263257529Sluigi *			In TX rings (used for NIC or host stack ports)
264260368Sluigi *			nkr_hwcur <= nkr_hwlease < nkr_hwtail
265257529Sluigi *	nkr_leases	array of nkr_num_slots where writers can report
266257529Sluigi *			completion of their block. NR_NOSLOT (~0) indicates
267257529Sluigi *			that the writer has not finished yet
268259412Sluigi *	nkr_lease_idx	index of next free slot in nr_leases, to be assigned
269257529Sluigi *
270257529Sluigi * The kring is manipulated by txsync/rxsync and generic netmap function.
271260368Sluigi *
272260368Sluigi * Concurrent rxsync or txsync on the same ring are prevented through
273267165Sluigi * by nm_kr_(try)lock() which in turn uses nr_busy. This is all we need
274260368Sluigi * for NIC rings, and for TX rings attached to the host stack.
275260368Sluigi *
276260368Sluigi * RX rings attached to the host stack use an mbq (rx_queue) on both
277260368Sluigi * rxsync_from_host() and netmap_transmit(). The mbq is protected
278260368Sluigi * by its internal lock.
279260368Sluigi *
280270063Sluigi * RX rings attached to the VALE switch are accessed by both senders
281260368Sluigi * and receiver. They are protected through the q_lock on the RX ring.
282227614Sluigi */
283227614Sluigistruct netmap_kring {
284260368Sluigi	struct netmap_ring	*ring;
285227614Sluigi
286260368Sluigi	uint32_t	nr_hwcur;
287260368Sluigi	uint32_t	nr_hwtail;
288260368Sluigi
289260368Sluigi	/*
290260368Sluigi	 * Copies of values in user rings, so we do not need to look
291260368Sluigi	 * at the ring (which could be modified). These are set in the
292260368Sluigi	 * *sync_prologue()/finalize() routines.
293260368Sluigi	 */
294260368Sluigi	uint32_t	rhead;
295260368Sluigi	uint32_t	rcur;
296260368Sluigi	uint32_t	rtail;
297260368Sluigi
298260368Sluigi	uint32_t	nr_kflags;	/* private driver flags */
299260368Sluigi#define NKR_PENDINTR	0x1		// Pending interrupt.
300285349Sluigi#define NKR_EXCLUSIVE	0x2		/* exclusive binding */
301260368Sluigi	uint32_t	nkr_num_slots;
302260368Sluigi
303260368Sluigi	/*
304260368Sluigi	 * On a NIC reset, the NIC ring indexes may be reset but the
305260368Sluigi	 * indexes in the netmap rings remain the same. nkr_hwofs
306260368Sluigi	 * keeps track of the offset between the two.
307260368Sluigi	 */
308260368Sluigi	int32_t		nkr_hwofs;
309260368Sluigi
310245579Sluigi	uint16_t	nkr_slot_flags;	/* initial value for flags */
311260368Sluigi
312260368Sluigi	/* last_reclaim is opaque marker to help reduce the frequency
313260368Sluigi	 * of operations such as reclaiming tx buffers. A possible use
314260368Sluigi	 * is set it to ticks and do the reclaim only once per tick.
315260368Sluigi	 */
316260368Sluigi	uint64_t	last_reclaim;
317260368Sluigi
318260368Sluigi
319260368Sluigi	NM_SELINFO_T	si;		/* poll/select wait queue */
320260368Sluigi	NM_LOCK_T	q_lock;		/* protects kring and ring. */
321260368Sluigi	NM_ATOMIC_T	nr_busy;	/* prevent concurrent syscalls */
322260368Sluigi
323231594Sluigi	struct netmap_adapter *na;
324260368Sluigi
325274355Sluigi	/* The following fields are for VALE switch support */
326251139Sluigi	struct nm_bdg_fwd *nkr_ft;
327260368Sluigi	uint32_t	*nkr_leases;
328260368Sluigi#define NR_NOSLOT	((uint32_t)~0)	/* used in nkr_*lease* */
329260368Sluigi	uint32_t	nkr_hwlease;
330260368Sluigi	uint32_t	nkr_lease_idx;
331257529Sluigi
332270063Sluigi	/* while nkr_stopped is set, no new [tr]xsync operations can
333270063Sluigi	 * be started on this kring.
334270063Sluigi	 * This is used by netmap_disable_all_rings()
335270063Sluigi	 * to find a synchronization point where critical data
336270063Sluigi	 * structures pointed to by the kring can be added or removed
337270063Sluigi	 */
338270063Sluigi	volatile int nkr_stopped;
339257529Sluigi
340261909Sluigi	/* Support for adapters without native netmap support.
341259412Sluigi	 * On tx rings we preallocate an array of tx buffers
342259412Sluigi	 * (same size as the netmap ring), on rx rings we
343261909Sluigi	 * store incoming mbufs in a queue that is drained by
344261909Sluigi	 * a rxsync.
345259412Sluigi	 */
346259412Sluigi	struct mbuf **tx_pool;
347260368Sluigi	// u_int nr_ntc;		/* Emulation of a next-to-clean RX ring pointer. */
348260368Sluigi	struct mbq rx_queue;            /* intercepted rx mbufs. */
349259412Sluigi
350285349Sluigi	uint32_t	users;		/* existing bindings for this ring */
351285349Sluigi
352260368Sluigi	uint32_t	ring_id;	/* debugging */
353285349Sluigi	enum txrx	tx;		/* kind of ring (tx or rx) */
354260368Sluigi	char name[64];			/* diagnostic */
355260368Sluigi
356270063Sluigi	/* [tx]sync callback for this kring.
357270063Sluigi	 * The default nm_kring_create callback (netmap_krings_create)
358270063Sluigi	 * sets the nm_sync callback of each hardware tx(rx) kring to
359270063Sluigi	 * the corresponding nm_txsync(nm_rxsync) taken from the
360270063Sluigi	 * netmap_adapter; moreover, it sets the sync callback
361270063Sluigi	 * of the host tx(rx) ring to netmap_txsync_to_host
362270063Sluigi	 * (netmap_rxsync_from_host).
363270063Sluigi	 *
364270063Sluigi	 * Overrides: the above configuration is not changed by
365270063Sluigi	 * any of the nm_krings_create callbacks.
366270063Sluigi	 */
367261909Sluigi	int (*nm_sync)(struct netmap_kring *kring, int flags);
368285349Sluigi	int (*nm_notify)(struct netmap_kring *kring, int flags);
369261909Sluigi
370261909Sluigi#ifdef WITH_PIPES
371270063Sluigi	struct netmap_kring *pipe;	/* if this is a pipe ring,
372270063Sluigi					 * pointer to the other end
373270063Sluigi					 */
374270063Sluigi	struct netmap_ring *save_ring;	/* pointer to hidden rings
375270063Sluigi       					 * (see netmap_pipe.c for details)
376270063Sluigi					 */
377261909Sluigi#endif /* WITH_PIPES */
378261909Sluigi
379285349Sluigi#ifdef WITH_VALE
380285349Sluigi	int (*save_notify)(struct netmap_kring *kring, int flags);
381285349Sluigi#endif
382285349Sluigi
383270063Sluigi#ifdef WITH_MONITOR
384285349Sluigi	/* array of krings that are monitoring this kring */
385285349Sluigi	struct netmap_kring **monitors;
386285349Sluigi	uint32_t max_monitors; /* current size of the monitors array */
387285349Sluigi	uint32_t n_monitors;	/* next unused entry in the monitor array */
388270063Sluigi	/*
389285349Sluigi	 * Monitors work by intercepting the sync and notify callbacks of the
390285349Sluigi	 * monitored krings. This is implemented by replacing the pointers
391285349Sluigi	 * above and saving the previous ones in mon_* pointers below
392270063Sluigi	 */
393285349Sluigi	int (*mon_sync)(struct netmap_kring *kring, int flags);
394285349Sluigi	int (*mon_notify)(struct netmap_kring *kring, int flags);
395285349Sluigi
396285349Sluigi	uint32_t mon_tail;  /* last seen slot on rx */
397285349Sluigi	uint32_t mon_pos;   /* index of this ring in the monitored ring array */
398270063Sluigi#endif
399230572Sluigi} __attribute__((__aligned__(64)));
400227614Sluigi
401257529Sluigi
402257529Sluigi/* return the next index, with wraparound */
403257529Sluigistatic inline uint32_t
404257529Sluiginm_next(uint32_t i, uint32_t lim)
405257529Sluigi{
406257529Sluigi	return unlikely (i == lim) ? 0 : i + 1;
407257529Sluigi}
408257529Sluigi
409260368Sluigi
410260368Sluigi/* return the previous index, with wraparound */
411260368Sluigistatic inline uint32_t
412260368Sluiginm_prev(uint32_t i, uint32_t lim)
413260368Sluigi{
414260368Sluigi	return unlikely (i == 0) ? lim : i - 1;
415260368Sluigi}
416260368Sluigi
417260368Sluigi
418227614Sluigi/*
419257529Sluigi *
420257529Sluigi * Here is the layout for the Rx and Tx rings.
421257529Sluigi
422257529Sluigi       RxRING                            TxRING
423257529Sluigi
424257529Sluigi      +-----------------+            +-----------------+
425257529Sluigi      |                 |            |                 |
426257529Sluigi      |XXX free slot XXX|            |XXX free slot XXX|
427257529Sluigi      +-----------------+            +-----------------+
428260368Sluigihead->| owned by user   |<-hwcur     | not sent to nic |<-hwcur
429260368Sluigi      |                 |            | yet             |
430260368Sluigi      +-----------------+            |                 |
431260368Sluigi cur->| available to    |            |                 |
432260368Sluigi      | user, not read  |            +-----------------+
433260368Sluigi      | yet             |       cur->| (being          |
434260368Sluigi      |                 |            |  prepared)      |
435260368Sluigi      |                 |            |                 |
436260368Sluigi      +-----------------+            +     ------      +
437260368Sluigitail->|                 |<-hwtail    |                 |<-hwlease
438260368Sluigi      | (being          | ...        |                 | ...
439260368Sluigi      |  prepared)      | ...        |                 | ...
440260368Sluigi      +-----------------+ ...        |                 | ...
441257529Sluigi      |                 |<-hwlease   +-----------------+
442260368Sluigi      |                 |      tail->|                 |<-hwtail
443257529Sluigi      |                 |            |                 |
444257529Sluigi      |                 |            |                 |
445257529Sluigi      |                 |            |                 |
446257529Sluigi      +-----------------+            +-----------------+
447257529Sluigi
448260368Sluigi * The cur/tail (user view) and hwcur/hwtail (kernel view)
449257529Sluigi * are used in the normal operation of the card.
450257529Sluigi *
451257529Sluigi * When a ring is the output of a switch port (Rx ring for
452257529Sluigi * a VALE port, Tx ring for the host stack or NIC), slots
453257529Sluigi * are reserved in blocks through 'hwlease' which points
454257529Sluigi * to the next unused slot.
455260368Sluigi * On an Rx ring, hwlease is always after hwtail,
456260368Sluigi * and completions cause hwtail to advance.
457260368Sluigi * On a Tx ring, hwlease is always between cur and hwtail,
458257529Sluigi * and completions cause cur to advance.
459257529Sluigi *
460257529Sluigi * nm_kr_space() returns the maximum number of slots that
461257529Sluigi * can be assigned.
462257529Sluigi * nm_kr_lease() reserves the required number of buffers,
463257529Sluigi *    advances nkr_hwlease and also returns an entry in
464257529Sluigi *    a circular array where completions should be reported.
465257529Sluigi */
466257529Sluigi
467257529Sluigi
468285349Sluigistruct netmap_lut {
469285349Sluigi	struct lut_entry *lut;
470285349Sluigi	uint32_t objtotal;	/* max buffer index */
471285349Sluigi	uint32_t objsize;	/* buffer size */
472285349Sluigi};
473257529Sluigi
474270063Sluigistruct netmap_vp_adapter; // forward
475270063Sluigi
476257529Sluigi/*
477259412Sluigi * The "struct netmap_adapter" extends the "struct adapter"
478259412Sluigi * (or equivalent) device descriptor.
479259412Sluigi * It contains all base fields needed to support netmap operation.
480259412Sluigi * There are in fact different types of netmap adapters
481259412Sluigi * (native, generic, VALE switch...) so a netmap_adapter is
482259412Sluigi * just the first field in the derived type.
483227614Sluigi */
484227614Sluigistruct netmap_adapter {
485241719Sluigi	/*
486241719Sluigi	 * On linux we do not have a good way to tell if an interface
487259412Sluigi	 * is netmap-capable. So we always use the following trick:
488241719Sluigi	 * NA(ifp) points here, and the first entry (which hopefully
489241719Sluigi	 * always exists and is at least 32 bits) contains a magic
490241719Sluigi	 * value which we can use to detect that the interface is good.
491241719Sluigi	 */
492241719Sluigi	uint32_t magic;
493259412Sluigi	uint32_t na_flags;	/* enabled, and other flags */
494241719Sluigi#define NAF_SKIP_INTR	1	/* use the regular interrupt handler.
495241719Sluigi				 * useful during initialization
496241719Sluigi				 */
497251139Sluigi#define NAF_SW_ONLY	2	/* forward packets only to sw adapter */
498257529Sluigi#define NAF_BDG_MAYSLEEP 4	/* the bridge is allowed to sleep when
499257529Sluigi				 * forwarding packets coming from this
500257529Sluigi				 * interface
501257529Sluigi				 */
502285349Sluigi#define NAF_MEM_OWNER	8	/* the adapter uses its own memory area
503285349Sluigi				 * that cannot be changed
504257529Sluigi				 */
505285349Sluigi#define NAF_NATIVE      16      /* the adapter is native.
506270063Sluigi				 * Virtual ports (vale, pipe, monitor...)
507270063Sluigi				 * should never use this flag.
508259412Sluigi				 */
509259412Sluigi#define	NAF_NETMAP_ON	32	/* netmap is active (either native or
510270063Sluigi				 * emulated). Where possible (e.g. FreeBSD)
511259412Sluigi				 * IFCAP_NETMAP also mirrors this flag.
512259412Sluigi				 */
513261909Sluigi#define NAF_HOST_RINGS  64	/* the adapter supports the host rings */
514270063Sluigi#define NAF_FORCE_NATIVE 128	/* the adapter is always NATIVE */
515270063Sluigi#define	NAF_BUSY	(1U<<31) /* the adapter is used internally and
516270063Sluigi				  * cannot be registered from userspace
517270063Sluigi				  */
518259412Sluigi	int active_fds; /* number of user-space descriptors using this
519227614Sluigi			 interface, which is equal to the number of
520227614Sluigi			 struct netmap_if objs in the mapped region. */
521227614Sluigi
522239140Semaste	u_int num_rx_rings; /* number of adapter receive rings */
523239140Semaste	u_int num_tx_rings; /* number of adapter transmit rings */
524227614Sluigi
525285349Sluigi	u_int num_tx_desc;  /* number of descriptor in each queue */
526227614Sluigi	u_int num_rx_desc;
527227614Sluigi
528227614Sluigi	/* tx_rings and rx_rings are private but allocated
529227614Sluigi	 * as a contiguous chunk of memory. Each array has
530227614Sluigi	 * N+1 entries, for the adapter queues and for the host queue.
531227614Sluigi	 */
532227614Sluigi	struct netmap_kring *tx_rings; /* array of TX rings. */
533227614Sluigi	struct netmap_kring *rx_rings; /* array of RX rings. */
534260368Sluigi
535259412Sluigi	void *tailroom;		       /* space below the rings array */
536259412Sluigi				       /* (used for leases) */
537227614Sluigi
538259412Sluigi
539285349Sluigi	NM_SELINFO_T si[NR_TXRX];	/* global wait queues */
540232238Sluigi
541261909Sluigi	/* count users of the global wait queues */
542285349Sluigi	int si_users[NR_TXRX];
543261909Sluigi
544270063Sluigi	void *pdev; /* used to store pci device */
545270063Sluigi
546227614Sluigi	/* copy of if_qflush and if_transmit pointers, to intercept
547227614Sluigi	 * packets from the network stack when netmap is active.
548227614Sluigi	 */
549227614Sluigi	int     (*if_transmit)(struct ifnet *, struct mbuf *);
550227614Sluigi
551260368Sluigi	/* copy of if_input for netmap_send_up() */
552260368Sluigi	void     (*if_input)(struct ifnet *, struct mbuf *);
553260368Sluigi
554227614Sluigi	/* references to the ifnet and device routines, used by
555227614Sluigi	 * the generic netmap functions.
556227614Sluigi	 */
557227614Sluigi	struct ifnet *ifp; /* adapter is ifp->if_softc */
558227614Sluigi
559260368Sluigi	/*---- callbacks for this netmap adapter -----*/
560260368Sluigi	/*
561260368Sluigi	 * nm_dtor() is the cleanup routine called when destroying
562260368Sluigi	 *	the adapter.
563267165Sluigi	 *	Called with NMG_LOCK held.
564260368Sluigi	 *
565260368Sluigi	 * nm_register() is called on NIOCREGIF and close() to enter
566260368Sluigi	 *	or exit netmap mode on the NIC
567270063Sluigi	 *	Called with NNG_LOCK held.
568260368Sluigi	 *
569260368Sluigi	 * nm_txsync() pushes packets to the underlying hw/switch
570260368Sluigi	 *
571260368Sluigi	 * nm_rxsync() collects packets from the underlying hw/switch
572260368Sluigi	 *
573260368Sluigi	 * nm_config() returns configuration information from the OS
574267165Sluigi	 *	Called with NMG_LOCK held.
575260368Sluigi	 *
576270063Sluigi	 * nm_krings_create() create and init the tx_rings and
577270063Sluigi	 * 	rx_rings arrays of kring structures. In particular,
578270063Sluigi	 * 	set the nm_sync callbacks for each ring.
579270063Sluigi	 * 	There is no need to also allocate the corresponding
580270063Sluigi	 * 	netmap_rings, since netmap_mem_rings_create() will always
581270063Sluigi	 * 	be called to provide the missing ones.
582270063Sluigi	 *	Called with NNG_LOCK held.
583260368Sluigi	 *
584270063Sluigi	 * nm_krings_delete() cleanup and delete the tx_rings and rx_rings
585270063Sluigi	 * 	arrays
586270063Sluigi	 *	Called with NMG_LOCK held.
587260368Sluigi	 *
588267165Sluigi	 * nm_notify() is used to act after data have become available
589270063Sluigi	 * 	(or the stopped state of the ring has changed)
590260368Sluigi	 *	For hw devices this is typically a selwakeup(),
591260368Sluigi	 *	but for NIC/host ports attached to a switch (or vice-versa)
592260368Sluigi	 *	we also need to invoke the 'txsync' code downstream.
593260368Sluigi	 */
594259412Sluigi	void (*nm_dtor)(struct netmap_adapter *);
595231594Sluigi
596259412Sluigi	int (*nm_register)(struct netmap_adapter *, int onoff);
597257529Sluigi
598270063Sluigi	int (*nm_txsync)(struct netmap_kring *kring, int flags);
599270063Sluigi	int (*nm_rxsync)(struct netmap_kring *kring, int flags);
600285349Sluigi	int (*nm_notify)(struct netmap_kring *kring, int flags);
601257529Sluigi#define NAF_FORCE_READ    1
602257529Sluigi#define NAF_FORCE_RECLAIM 2
603245835Sluigi	/* return configuration information */
604259412Sluigi	int (*nm_config)(struct netmap_adapter *,
605259412Sluigi		u_int *txr, u_int *txd, u_int *rxr, u_int *rxd);
606259412Sluigi	int (*nm_krings_create)(struct netmap_adapter *);
607259412Sluigi	void (*nm_krings_delete)(struct netmap_adapter *);
608270063Sluigi#ifdef WITH_VALE
609270063Sluigi	/*
610270063Sluigi	 * nm_bdg_attach() initializes the na_vp field to point
611270063Sluigi	 *      to an adapter that can be attached to a VALE switch. If the
612270063Sluigi	 *      current adapter is already a VALE port, na_vp is simply a cast;
613270063Sluigi	 *      otherwise, na_vp points to a netmap_bwrap_adapter.
614270063Sluigi	 *      If applicable, this callback also initializes na_hostvp,
615270063Sluigi	 *      that can be used to connect the adapter host rings to the
616270063Sluigi	 *      switch.
617270063Sluigi	 *      Called with NMG_LOCK held.
618270063Sluigi	 *
619270063Sluigi	 * nm_bdg_ctl() is called on the actual attach/detach to/from
620270063Sluigi	 *      to/from the switch, to perform adapter-specific
621270063Sluigi	 *      initializations
622270063Sluigi	 *      Called with NMG_LOCK held.
623270063Sluigi	 */
624270063Sluigi	int (*nm_bdg_attach)(const char *bdg_name, struct netmap_adapter *);
625270063Sluigi	int (*nm_bdg_ctl)(struct netmap_adapter *, struct nmreq *, int);
626270063Sluigi
627270063Sluigi	/* adapter used to attach this adapter to a VALE switch (if any) */
628270063Sluigi	struct netmap_vp_adapter *na_vp;
629270063Sluigi	/* adapter used to attach the host rings of this adapter
630270063Sluigi	 * to a VALE switch (if any) */
631270063Sluigi	struct netmap_vp_adapter *na_hostvp;
632270063Sluigi#endif
633270063Sluigi
634259412Sluigi	/* standard refcount to control the lifetime of the adapter
635259412Sluigi	 * (it should be equal to the lifetime of the corresponding ifp)
636259412Sluigi	 */
637259412Sluigi	int na_refcount;
638259412Sluigi
639259412Sluigi	/* memory allocator (opaque)
640259412Sluigi	 * We also cache a pointer to the lut_entry for translating
641259412Sluigi	 * buffer addresses, and the total number of buffers.
642259412Sluigi	 */
643259412Sluigi 	struct netmap_mem_d *nm_mem;
644285349Sluigi	struct netmap_lut na_lut;
645259412Sluigi
646270063Sluigi	/* additional information attached to this adapter
647270063Sluigi	 * by other netmap subsystems. Currently used by
648270063Sluigi	 * bwrap and LINUX/v1000.
649259412Sluigi	 */
650259412Sluigi	void *na_private;
651261909Sluigi
652270063Sluigi	/* array of pipes that have this adapter as a parent */
653261909Sluigi	struct netmap_pipe_adapter **na_pipes;
654270063Sluigi	int na_next_pipe;	/* next free slot in the array */
655270063Sluigi	int na_max_pipes;	/* size of the array */
656270063Sluigi
657270063Sluigi	char name[64];
658259412Sluigi};
659259412Sluigi
660285349Sluigistatic __inline u_int
661285349Sluiginma_get_ndesc(struct netmap_adapter *na, enum txrx t)
662285349Sluigi{
663285349Sluigi	return (t == NR_TX ? na->num_tx_desc : na->num_rx_desc);
664285349Sluigi}
665260368Sluigi
666285349Sluigistatic __inline void
667285349Sluiginma_set_ndesc(struct netmap_adapter *na, enum txrx t, u_int v)
668285349Sluigi{
669285349Sluigi	if (t == NR_TX)
670285349Sluigi		na->num_tx_desc = v;
671285349Sluigi	else
672285349Sluigi		na->num_rx_desc = v;
673285349Sluigi}
674285349Sluigi
675285349Sluigistatic __inline u_int
676285349Sluiginma_get_nrings(struct netmap_adapter *na, enum txrx t)
677285349Sluigi{
678285349Sluigi	return (t == NR_TX ? na->num_tx_rings : na->num_rx_rings);
679285349Sluigi}
680285349Sluigi
681285349Sluigistatic __inline void
682285349Sluiginma_set_nrings(struct netmap_adapter *na, enum txrx t, u_int v)
683285349Sluigi{
684285349Sluigi	if (t == NR_TX)
685285349Sluigi		na->num_tx_rings = v;
686285349Sluigi	else
687285349Sluigi		na->num_rx_rings = v;
688285349Sluigi}
689285349Sluigi
690285349Sluigistatic __inline struct netmap_kring*
691285349SluigiNMR(struct netmap_adapter *na, enum txrx t)
692285349Sluigi{
693285349Sluigi	return (t == NR_TX ? na->tx_rings : na->rx_rings);
694285349Sluigi}
695285349Sluigi
696259412Sluigi/*
697259412Sluigi * If the NIC is owned by the kernel
698259412Sluigi * (i.e., bridge), neither another bridge nor user can use it;
699259412Sluigi * if the NIC is owned by a user, only users can share it.
700259412Sluigi * Evaluation must be done under NMG_LOCK().
701259412Sluigi */
702270063Sluigi#define NETMAP_OWNED_BY_KERN(na)	((na)->na_flags & NAF_BUSY)
703259412Sluigi#define NETMAP_OWNED_BY_ANY(na) \
704270063Sluigi	(NETMAP_OWNED_BY_KERN(na) || ((na)->active_fds > 0))
705259412Sluigi
706259412Sluigi/*
707259412Sluigi * derived netmap adapters for various types of ports
708259412Sluigi */
709259412Sluigistruct netmap_vp_adapter {	/* VALE software port */
710259412Sluigi	struct netmap_adapter up;
711259412Sluigi
712250107Sluigi	/*
713250107Sluigi	 * Bridge support:
714250107Sluigi	 *
715250107Sluigi	 * bdg_port is the port number used in the bridge;
716251139Sluigi	 * na_bdg points to the bridge this NA is attached to.
717250107Sluigi	 */
718238812Sluigi	int bdg_port;
719251139Sluigi	struct nm_bridge *na_bdg;
720259412Sluigi	int retry;
721259412Sluigi
722261909Sluigi	/* Offset of ethernet header for each packet. */
723261909Sluigi	u_int virt_hdr_len;
724261909Sluigi	/* Maximum Frame Size, used in bdg_mismatch_datapath() */
725261909Sluigi	u_int mfs;
726285349Sluigi	/* Last source MAC on this port */
727285349Sluigi	uint64_t last_smac;
728259412Sluigi};
729259412Sluigi
730260368Sluigi
731259412Sluigistruct netmap_hw_adapter {	/* physical device */
732259412Sluigi	struct netmap_adapter up;
733259412Sluigi
734259412Sluigi	struct net_device_ops nm_ndo;	// XXX linux only
735270063Sluigi	struct ethtool_ops    nm_eto;	// XXX linux only
736270063Sluigi	const struct ethtool_ops*   save_ethtool;
737270063Sluigi
738270063Sluigi	int (*nm_hw_register)(struct netmap_adapter *, int onoff);
739259412Sluigi};
740259412Sluigi
741274362Sluigi#ifdef WITH_GENERIC
742261909Sluigi/* Mitigation support. */
743261909Sluigistruct nm_generic_mit {
744261909Sluigi	struct hrtimer mit_timer;
745261909Sluigi	int mit_pending;
746270063Sluigi	int mit_ring_idx;  /* index of the ring being mitigated */
747261909Sluigi	struct netmap_adapter *mit_na;  /* backpointer */
748261909Sluigi};
749260368Sluigi
750260368Sluigistruct netmap_generic_adapter {	/* emulated device */
751259412Sluigi	struct netmap_hw_adapter up;
752259412Sluigi
753259412Sluigi	/* Pointer to a previously used netmap adapter. */
754259412Sluigi	struct netmap_adapter *prev;
755259412Sluigi
756259412Sluigi	/* generic netmap adapters support:
757259412Sluigi	 * a net_device_ops struct overrides ndo_select_queue(),
758259412Sluigi	 * save_if_input saves the if_input hook (FreeBSD),
759261909Sluigi	 * mit implements rx interrupt mitigation,
760259412Sluigi	 */
761259412Sluigi	struct net_device_ops generic_ndo;
762259412Sluigi	void (*save_if_input)(struct ifnet *, struct mbuf *);
763259412Sluigi
764261909Sluigi	struct nm_generic_mit *mit;
765260368Sluigi#ifdef linux
766260368Sluigi        netdev_tx_t (*save_start_xmit)(struct mbuf *, struct ifnet *);
767260368Sluigi#endif
768259412Sluigi};
769274362Sluigi#endif  /* WITH_GENERIC */
770259412Sluigi
771261909Sluigistatic __inline int
772285349Sluiginetmap_real_rings(struct netmap_adapter *na, enum txrx t)
773261909Sluigi{
774285349Sluigi	return nma_get_nrings(na, t) + !!(na->na_flags & NAF_HOST_RINGS);
775261909Sluigi}
776261909Sluigi
777259412Sluigi#ifdef WITH_VALE
778259412Sluigi
779260368Sluigi/*
780260368Sluigi * Bridge wrapper for non VALE ports attached to a VALE switch.
781259412Sluigi *
782260368Sluigi * The real device must already have its own netmap adapter (hwna).
783260368Sluigi * The bridge wrapper and the hwna adapter share the same set of
784260368Sluigi * netmap rings and buffers, but they have two separate sets of
785260368Sluigi * krings descriptors, with tx/rx meanings swapped:
786259412Sluigi *
787259412Sluigi *                                  netmap
788259412Sluigi *           bwrap     krings       rings      krings      hwna
789259412Sluigi *         +------+   +------+     +-----+    +------+   +------+
790259412Sluigi *         |tx_rings->|      |\   /|     |----|      |<-tx_rings|
791259412Sluigi *         |      |   +------+ \ / +-----+    +------+   |      |
792259412Sluigi *         |      |             X                        |      |
793259412Sluigi *         |      |            / \                       |      |
794259412Sluigi *         |      |   +------+/   \+-----+    +------+   |      |
795259412Sluigi *         |rx_rings->|      |     |     |----|      |<-rx_rings|
796259412Sluigi *         |      |   +------+     +-----+    +------+   |      |
797259412Sluigi *         +------+                                      +------+
798259412Sluigi *
799260368Sluigi * - packets coming from the bridge go to the brwap rx rings,
800260368Sluigi *   which are also the hwna tx rings.  The bwrap notify callback
801260368Sluigi *   will then complete the hwna tx (see netmap_bwrap_notify).
802259412Sluigi *
803260368Sluigi * - packets coming from the outside go to the hwna rx rings,
804260368Sluigi *   which are also the bwrap tx rings.  The (overwritten) hwna
805260368Sluigi *   notify method will then complete the bridge tx
806260368Sluigi *   (see netmap_bwrap_intr_notify).
807259412Sluigi *
808260368Sluigi *   The bridge wrapper may optionally connect the hwna 'host' rings
809260368Sluigi *   to the bridge. This is done by using a second port in the
810260368Sluigi *   bridge and connecting it to the 'host' netmap_vp_adapter
811260368Sluigi *   contained in the netmap_bwrap_adapter. The brwap host adapter
812260368Sluigi *   cross-links the hwna host rings in the same way as shown above.
813259412Sluigi *
814260368Sluigi * - packets coming from the bridge and directed to the host stack
815260368Sluigi *   are handled by the bwrap host notify callback
816260368Sluigi *   (see netmap_bwrap_host_notify)
817260368Sluigi *
818260368Sluigi * - packets coming from the host stack are still handled by the
819260368Sluigi *   overwritten hwna notify callback (netmap_bwrap_intr_notify),
820260368Sluigi *   but are diverted to the host adapter depending on the ring number.
821260368Sluigi *
822259412Sluigi */
823259412Sluigistruct netmap_bwrap_adapter {
824259412Sluigi	struct netmap_vp_adapter up;
825259412Sluigi	struct netmap_vp_adapter host;  /* for host rings */
826259412Sluigi	struct netmap_adapter *hwna;	/* the underlying device */
827259412Sluigi
828270063Sluigi	/* backup of the hwna memory allocator */
829270063Sluigi	struct netmap_mem_d *save_nmd;
830260368Sluigi
831260368Sluigi	/*
832260368Sluigi	 * When we attach a physical interface to the bridge, we
833251139Sluigi	 * allow the controlling process to terminate, so we need
834270063Sluigi	 * a place to store the n_detmap_priv_d data structure.
835260368Sluigi	 * This is only done when physical interfaces
836260368Sluigi	 * are attached to a bridge.
837251139Sluigi	 */
838251139Sluigi	struct netmap_priv_d *na_kpriv;
839227614Sluigi};
840270063Sluigiint netmap_bwrap_attach(const char *name, struct netmap_adapter *);
841227614Sluigi
842259412Sluigi
843260368Sluigi#endif /* WITH_VALE */
844257529Sluigi
845261909Sluigi#ifdef WITH_PIPES
846257529Sluigi
847261909Sluigi#define NM_MAXPIPES 	64	/* max number of pipes per adapter */
848261909Sluigi
849261909Sluigistruct netmap_pipe_adapter {
850261909Sluigi	struct netmap_adapter up;
851261909Sluigi
852261909Sluigi	u_int id; 	/* pipe identifier */
853261909Sluigi	int role;	/* either NR_REG_PIPE_MASTER or NR_REG_PIPE_SLAVE */
854261909Sluigi
855261909Sluigi	struct netmap_adapter *parent; /* adapter that owns the memory */
856261909Sluigi	struct netmap_pipe_adapter *peer; /* the other end of the pipe */
857261909Sluigi	int peer_ref;		/* 1 iff we are holding a ref to the peer */
858261909Sluigi
859261909Sluigi	u_int parent_slot; /* index in the parent pipe array */
860261909Sluigi};
861261909Sluigi
862261909Sluigi#endif /* WITH_PIPES */
863261909Sluigi
864261909Sluigi
865260368Sluigi/* return slots reserved to rx clients; used in drivers */
866257529Sluigistatic inline uint32_t
867260368Sluiginm_kr_rxspace(struct netmap_kring *k)
868257529Sluigi{
869260368Sluigi	int space = k->nr_hwtail - k->nr_hwcur;
870267128Sluigi	if (space < 0)
871260368Sluigi		space += k->nkr_num_slots;
872260368Sluigi	ND("preserving %d rx slots %d -> %d", space, k->nr_hwcur, k->nr_hwtail);
873257529Sluigi
874260368Sluigi	return space;
875257529Sluigi}
876257529Sluigi
877257529Sluigi
878260368Sluigi/* True if no space in the tx ring. only valid after txsync_prologue */
879260368Sluigistatic inline int
880260368Sluiginm_kr_txempty(struct netmap_kring *kring)
881259412Sluigi{
882260368Sluigi	return kring->rcur == kring->nr_hwtail;
883259412Sluigi}
884259412Sluigi
885259412Sluigi
886257529Sluigi/*
887259412Sluigi * protect against multiple threads using the same ring.
888259412Sluigi * also check that the ring has not been stopped.
889259412Sluigi * We only care for 0 or !=0 as a return code.
890227614Sluigi */
891259412Sluigi#define NM_KR_BUSY	1
892259412Sluigi#define NM_KR_STOPPED	2
893227614Sluigi
894260368Sluigi
895259412Sluigistatic __inline void nm_kr_put(struct netmap_kring *kr)
896259412Sluigi{
897259412Sluigi	NM_ATOMIC_CLEAR(&kr->nr_busy);
898259412Sluigi}
899227614Sluigi
900260368Sluigi
901259412Sluigistatic __inline int nm_kr_tryget(struct netmap_kring *kr)
902259412Sluigi{
903259412Sluigi	/* check a first time without taking the lock
904259412Sluigi	 * to avoid starvation for nm_kr_get()
905259412Sluigi	 */
906259412Sluigi	if (unlikely(kr->nkr_stopped)) {
907259412Sluigi		ND("ring %p stopped (%d)", kr, kr->nkr_stopped);
908259412Sluigi		return NM_KR_STOPPED;
909259412Sluigi	}
910259412Sluigi	if (unlikely(NM_ATOMIC_TEST_AND_SET(&kr->nr_busy)))
911259412Sluigi		return NM_KR_BUSY;
912259412Sluigi	/* check a second time with lock held */
913259412Sluigi	if (unlikely(kr->nkr_stopped)) {
914259412Sluigi		ND("ring %p stopped (%d)", kr, kr->nkr_stopped);
915259412Sluigi		nm_kr_put(kr);
916259412Sluigi		return NM_KR_STOPPED;
917259412Sluigi	}
918259412Sluigi	return 0;
919259412Sluigi}
920227614Sluigi
921285349Sluigistatic __inline void nm_kr_get(struct netmap_kring *kr)
922285349Sluigi{
923285349Sluigi	while (NM_ATOMIC_TEST_AND_SET(&kr->nr_busy))
924285349Sluigi		tsleep(kr, 0, "NM_KR_GET", 4);
925285349Sluigi}
926259412Sluigi
927285349Sluigi
928285349Sluigi
929285349Sluigi
930227614Sluigi/*
931260368Sluigi * The following functions are used by individual drivers to
932227614Sluigi * support netmap operation.
933227614Sluigi *
934227614Sluigi * netmap_attach() initializes a struct netmap_adapter, allocating the
935227614Sluigi * 	struct netmap_ring's and the struct selinfo.
936227614Sluigi *
937227614Sluigi * netmap_detach() frees the memory allocated by netmap_attach().
938227614Sluigi *
939257529Sluigi * netmap_transmit() replaces the if_transmit routine of the interface,
940227614Sluigi *	and is used to intercept packets coming from the stack.
941227614Sluigi *
942227614Sluigi * netmap_load_map/netmap_reload_map are helper routines to set/reset
943227614Sluigi *	the dmamap for a packet buffer
944227614Sluigi *
945270063Sluigi * netmap_reset() is a helper routine to be called in the hw driver
946270063Sluigi *	when reinitializing a ring. It should not be called by
947270063Sluigi *	virtual ports (vale, pipes, monitor)
948227614Sluigi */
949259412Sluigiint netmap_attach(struct netmap_adapter *);
950227614Sluigivoid netmap_detach(struct ifnet *);
951257529Sluigiint netmap_transmit(struct ifnet *, struct mbuf *);
952227614Sluigistruct netmap_slot *netmap_reset(struct netmap_adapter *na,
953257529Sluigi	enum txrx tx, u_int n, u_int new_cur);
954227614Sluigiint netmap_ring_reinit(struct netmap_kring *);
955227614Sluigi
956260368Sluigi/* default functions to handle rx/tx interrupts */
957260368Sluigiint netmap_rx_irq(struct ifnet *, u_int, u_int *);
958260368Sluigi#define netmap_tx_irq(_n, _q) netmap_rx_irq(_n, _q, NULL)
959260368Sluigivoid netmap_common_irq(struct ifnet *, u_int, u_int *work_done);
960260368Sluigi
961260368Sluigi
962270063Sluigi#ifdef WITH_VALE
963270063Sluigi/* functions used by external modules to interface with VALE */
964270063Sluigi#define netmap_vp_to_ifp(_vp)	((_vp)->up.ifp)
965270063Sluigi#define netmap_ifp_to_vp(_ifp)	(NA(_ifp)->na_vp)
966270063Sluigi#define netmap_ifp_to_host_vp(_ifp) (NA(_ifp)->na_hostvp)
967270063Sluigi#define netmap_bdg_idx(_vp)	((_vp)->bdg_port)
968270063Sluigiconst char *netmap_bdg_name(struct netmap_vp_adapter *);
969270063Sluigi#else /* !WITH_VALE */
970270063Sluigi#define netmap_vp_to_ifp(_vp)	NULL
971270063Sluigi#define netmap_ifp_to_vp(_ifp)	NULL
972270063Sluigi#define netmap_ifp_to_host_vp(_ifp) NULL
973270063Sluigi#define netmap_bdg_idx(_vp)	-1
974270063Sluigi#define netmap_bdg_name(_vp)	NULL
975270063Sluigi#endif /* WITH_VALE */
976260368Sluigi
977270063Sluigistatic inline int
978285349Sluiginm_netmap_on(struct netmap_adapter *na)
979270063Sluigi{
980285349Sluigi	return na && na->na_flags & NAF_NETMAP_ON;
981270063Sluigi}
982270063Sluigi
983270063Sluigistatic inline int
984285349Sluiginm_native_on(struct netmap_adapter *na)
985270063Sluigi{
986285349Sluigi	return nm_netmap_on(na) && (na->na_flags & NAF_NATIVE);
987270063Sluigi}
988270063Sluigi
989260368Sluigi/* set/clear native flags and if_transmit/netdev_ops */
990259412Sluigistatic inline void
991259412Sluiginm_set_native_flags(struct netmap_adapter *na)
992259412Sluigi{
993259412Sluigi	struct ifnet *ifp = na->ifp;
994259412Sluigi
995285349Sluigi	na->na_flags |= NAF_NETMAP_ON;
996259412Sluigi#ifdef IFCAP_NETMAP /* or FreeBSD ? */
997259412Sluigi	ifp->if_capenable |= IFCAP_NETMAP;
998259412Sluigi#endif
999259412Sluigi#ifdef __FreeBSD__
1000259412Sluigi	na->if_transmit = ifp->if_transmit;
1001259412Sluigi	ifp->if_transmit = netmap_transmit;
1002259412Sluigi#else
1003259412Sluigi	na->if_transmit = (void *)ifp->netdev_ops;
1004259412Sluigi	ifp->netdev_ops = &((struct netmap_hw_adapter *)na)->nm_ndo;
1005270063Sluigi	((struct netmap_hw_adapter *)na)->save_ethtool = ifp->ethtool_ops;
1006270063Sluigi	ifp->ethtool_ops = &((struct netmap_hw_adapter*)na)->nm_eto;
1007259412Sluigi#endif
1008259412Sluigi}
1009259412Sluigi
1010260368Sluigi
1011259412Sluigistatic inline void
1012259412Sluiginm_clear_native_flags(struct netmap_adapter *na)
1013259412Sluigi{
1014259412Sluigi	struct ifnet *ifp = na->ifp;
1015259412Sluigi
1016259412Sluigi#ifdef __FreeBSD__
1017259412Sluigi	ifp->if_transmit = na->if_transmit;
1018259412Sluigi#else
1019259412Sluigi	ifp->netdev_ops = (void *)na->if_transmit;
1020270063Sluigi	ifp->ethtool_ops = ((struct netmap_hw_adapter*)na)->save_ethtool;
1021259412Sluigi#endif
1022285349Sluigi	na->na_flags &= ~NAF_NETMAP_ON;
1023259412Sluigi#ifdef IFCAP_NETMAP /* or FreeBSD ? */
1024259412Sluigi	ifp->if_capenable &= ~IFCAP_NETMAP;
1025259412Sluigi#endif
1026259412Sluigi}
1027259412Sluigi
1028260368Sluigi
1029259412Sluigi/* check/fix address and len in tx rings */
1030259412Sluigi#if 1 /* debug version */
1031270063Sluigi#define	NM_CHECK_ADDR_LEN(_na, _a, _l)	do {				\
1032270063Sluigi	if (_a == NETMAP_BUF_BASE(_na) || _l > NETMAP_BUF_SIZE(_na)) {	\
1033259412Sluigi		RD(5, "bad addr/len ring %d slot %d idx %d len %d",	\
1034270063Sluigi			kring->ring_id, nm_i, slot->buf_idx, len);	\
1035270063Sluigi		if (_l > NETMAP_BUF_SIZE(_na))				\
1036270063Sluigi			_l = NETMAP_BUF_SIZE(_na);			\
1037259412Sluigi	} } while (0)
1038259412Sluigi#else /* no debug version */
1039270063Sluigi#define	NM_CHECK_ADDR_LEN(_na, _a, _l)	do {				\
1040270063Sluigi		if (_l > NETMAP_BUF_SIZE(_na))				\
1041270063Sluigi			_l = NETMAP_BUF_SIZE(_na);			\
1042259412Sluigi	} while (0)
1043259412Sluigi#endif
1044259412Sluigi
1045259412Sluigi
1046259412Sluigi/*---------------------------------------------------------------*/
1047259412Sluigi/*
1048270063Sluigi * Support routines used by netmap subsystems
1049270063Sluigi * (native drivers, VALE, generic, pipes, monitors, ...)
1050259412Sluigi */
1051270063Sluigi
1052270063Sluigi
1053270063Sluigi/* common routine for all functions that create a netmap adapter. It performs
1054270063Sluigi * two main tasks:
1055270063Sluigi * - if the na points to an ifp, mark the ifp as netmap capable
1056270063Sluigi *   using na as its native adapter;
1057270063Sluigi * - provide defaults for the setup callbacks and the memory allocator
1058270063Sluigi */
1059270063Sluigiint netmap_attach_common(struct netmap_adapter *);
1060270063Sluigi/* common actions to be performed on netmap adapter destruction */
1061270063Sluigivoid netmap_detach_common(struct netmap_adapter *);
1062270063Sluigi/* fill priv->np_[tr]xq{first,last} using the ringid and flags information
1063270063Sluigi * coming from a struct nmreq
1064270063Sluigi */
1065270063Sluigiint netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags);
1066270063Sluigi/* update the ring parameters (number and size of tx and rx rings).
1067270063Sluigi * It calls the nm_config callback, if available.
1068270063Sluigi */
1069259412Sluigiint netmap_update_config(struct netmap_adapter *na);
1070270063Sluigi/* create and initialize the common fields of the krings array.
1071270063Sluigi * using the information that must be already available in the na.
1072270063Sluigi * tailroom can be used to request the allocation of additional
1073270063Sluigi * tailroom bytes after the krings array. This is used by
1074270063Sluigi * netmap_vp_adapter's (i.e., VALE ports) to make room for
1075270063Sluigi * leasing-related data structures
1076270063Sluigi */
1077261909Sluigiint netmap_krings_create(struct netmap_adapter *na, u_int tailroom);
1078270063Sluigi/* deletes the kring array of the adapter. The array must have
1079270063Sluigi * been created using netmap_krings_create
1080270063Sluigi */
1081259412Sluigivoid netmap_krings_delete(struct netmap_adapter *na);
1082285349Sluigiint netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait);
1083270063Sluigi
1084285349Sluigi
1085270063Sluigi/* set the stopped/enabled status of ring
1086270063Sluigi * When stopping, they also wait for all current activity on the ring to
1087270063Sluigi * terminate. The status change is then notified using the na nm_notify
1088270063Sluigi * callback.
1089270063Sluigi */
1090285349Sluigivoid netmap_set_ring(struct netmap_adapter *, u_int ring_id, enum txrx, int stopped);
1091270063Sluigi/* set the stopped/enabled status of all rings of the adapter. */
1092270063Sluigivoid netmap_set_all_rings(struct netmap_adapter *, int stopped);
1093270063Sluigi/* convenience wrappers for netmap_set_all_rings, used in drivers */
1094270063Sluigivoid netmap_disable_all_rings(struct ifnet *);
1095270063Sluigivoid netmap_enable_all_rings(struct ifnet *);
1096270063Sluigi
1097260368Sluigiint netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait);
1098259412Sluigi
1099285349Sluigiint
1100259412Sluiginetmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
1101285349Sluigi	uint16_t ringid, uint32_t flags);
1102259412Sluigi
1103259412Sluigi
1104259412Sluigi
1105257529Sluigiu_int nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg);
1106259412Sluigiint netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
1107259412Sluigiint netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na);
1108257529Sluigi
1109260368Sluigi
1110259412Sluigi#ifdef WITH_VALE
1111251139Sluigi/*
1112260368Sluigi * The following bridge-related functions are used by other
1113260368Sluigi * kernel modules.
1114260368Sluigi *
1115260368Sluigi * VALE only supports unicast or broadcast. The lookup
1116251139Sluigi * function can return 0 .. NM_BDG_MAXPORTS-1 for regular ports,
1117251139Sluigi * NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 for unknown.
1118251139Sluigi * XXX in practice "unknown" might be handled same as broadcast.
1119251139Sluigi */
1120270063Sluigitypedef u_int (*bdg_lookup_fn_t)(struct nm_bdg_fwd *ft, uint8_t *ring_nr,
1121285349Sluigi		struct netmap_vp_adapter *);
1122270063Sluigitypedef int (*bdg_config_fn_t)(struct nm_ifreq *);
1123270063Sluigitypedef void (*bdg_dtor_fn_t)(const struct netmap_vp_adapter *);
1124270063Sluigistruct netmap_bdg_ops {
1125270063Sluigi	bdg_lookup_fn_t lookup;
1126270063Sluigi	bdg_config_fn_t config;
1127270063Sluigi	bdg_dtor_fn_t	dtor;
1128270063Sluigi};
1129259412Sluigi
1130270063Sluigiu_int netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
1131285349Sluigi		struct netmap_vp_adapter *);
1132270063Sluigi
1133259412Sluigi#define	NM_BDG_MAXPORTS		254	/* up to 254 */
1134251139Sluigi#define	NM_BDG_BROADCAST	NM_BDG_MAXPORTS
1135251139Sluigi#define	NM_BDG_NOPORT		(NM_BDG_MAXPORTS+1)
1136251139Sluigi
1137259412Sluigi#define	NM_NAME			"vale"	/* prefix for bridge port name */
1138259412Sluigi
1139259412Sluigi/* these are redefined in case of no VALE support */
1140259412Sluigiint netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
1141285349Sluigistruct nm_bridge *netmap_init_bridges2(u_int);
1142285349Sluigivoid netmap_uninit_bridges2(struct nm_bridge *, u_int);
1143285349Sluigiint netmap_init_bridges(void);
1144285349Sluigivoid netmap_uninit_bridges(void);
1145270063Sluigiint netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops);
1146270063Sluigiint netmap_bdg_config(struct nmreq *nmr);
1147259412Sluigi
1148259412Sluigi#else /* !WITH_VALE */
1149259412Sluigi#define	netmap_get_bdg_na(_1, _2, _3)	0
1150285349Sluigi#define netmap_init_bridges(_1) 0
1151285349Sluigi#define netmap_uninit_bridges()
1152259412Sluigi#define	netmap_bdg_ctl(_1, _2)	EINVAL
1153259412Sluigi#endif /* !WITH_VALE */
1154259412Sluigi
1155261909Sluigi#ifdef WITH_PIPES
1156261909Sluigi/* max number of pipes per device */
1157261909Sluigi#define NM_MAXPIPES	64	/* XXX how many? */
1158261909Sluigivoid netmap_pipe_dealloc(struct netmap_adapter *);
1159261909Sluigiint netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
1160261909Sluigi#else /* !WITH_PIPES */
1161261909Sluigi#define NM_MAXPIPES	0
1162285349Sluigi#define netmap_pipe_alloc(_1, _2) 	0
1163261909Sluigi#define netmap_pipe_dealloc(_1)
1164285349Sluigi#define netmap_get_pipe_na(nmr, _2, _3)	\
1165285349Sluigi	({ int role__ = (nmr)->nr_flags & NR_REG_MASK; \
1166285349Sluigi	   (role__ == NR_REG_PIPE_MASTER || 	       \
1167285349Sluigi	    role__ == NR_REG_PIPE_SLAVE) ? EOPNOTSUPP : 0; })
1168261909Sluigi#endif
1169261909Sluigi
1170270063Sluigi#ifdef WITH_MONITOR
1171270063Sluigiint netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
1172285349Sluigivoid netmap_monitor_stop(struct netmap_adapter *na);
1173270063Sluigi#else
1174285349Sluigi#define netmap_get_monitor_na(nmr, _2, _3) \
1175285349Sluigi	((nmr)->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX) ? EOPNOTSUPP : 0)
1176270063Sluigi#endif
1177270063Sluigi
1178285349Sluigi#ifdef CONFIG_NET_NS
1179285349Sluigistruct net *netmap_bns_get(void);
1180285349Sluigivoid netmap_bns_put(struct net *);
1181285349Sluigivoid netmap_bns_getbridges(struct nm_bridge **, u_int *);
1182285349Sluigi#else
1183285349Sluigi#define netmap_bns_get()
1184285349Sluigi#define netmap_bns_put(_1)
1185285349Sluigi#define netmap_bns_getbridges(b, n) \
1186285349Sluigi	do { *b = nm_bridges; *n = NM_BRIDGES; } while (0)
1187285349Sluigi#endif
1188285349Sluigi
1189259412Sluigi/* Various prototypes */
1190259412Sluigiint netmap_poll(struct cdev *dev, int events, struct thread *td);
1191259412Sluigiint netmap_init(void);
1192259412Sluigivoid netmap_fini(void);
1193259412Sluigiint netmap_get_memory(struct netmap_priv_d* p);
1194259412Sluigivoid netmap_dtor(void *data);
1195259412Sluigiint netmap_dtor_locked(struct netmap_priv_d *priv);
1196259412Sluigi
1197259412Sluigiint netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td);
1198259412Sluigi
1199259412Sluigi/* netmap_adapter creation/destruction */
1200259412Sluigi
1201260368Sluigi// #define NM_DEBUG_PUTGET 1
1202260368Sluigi
1203259412Sluigi#ifdef NM_DEBUG_PUTGET
1204259412Sluigi
1205259412Sluigi#define NM_DBG(f) __##f
1206259412Sluigi
1207259412Sluigivoid __netmap_adapter_get(struct netmap_adapter *na);
1208259412Sluigi
1209259412Sluigi#define netmap_adapter_get(na) 				\
1210259412Sluigi	do {						\
1211259412Sluigi		struct netmap_adapter *__na = na;	\
1212270063Sluigi		D("getting %p:%s (%d)", __na, (__na)->name, (__na)->na_refcount);	\
1213259412Sluigi		__netmap_adapter_get(__na);		\
1214259412Sluigi	} while (0)
1215259412Sluigi
1216259412Sluigiint __netmap_adapter_put(struct netmap_adapter *na);
1217259412Sluigi
1218259412Sluigi#define netmap_adapter_put(na)				\
1219260411Sluigi	({						\
1220259412Sluigi		struct netmap_adapter *__na = na;	\
1221270063Sluigi		D("putting %p:%s (%d)", __na, (__na)->name, (__na)->na_refcount);	\
1222259412Sluigi		__netmap_adapter_put(__na);		\
1223260411Sluigi	})
1224259412Sluigi
1225259412Sluigi#else /* !NM_DEBUG_PUTGET */
1226259412Sluigi
1227259412Sluigi#define NM_DBG(f) f
1228259412Sluigivoid netmap_adapter_get(struct netmap_adapter *na);
1229259412Sluigiint netmap_adapter_put(struct netmap_adapter *na);
1230259412Sluigi
1231259412Sluigi#endif /* !NM_DEBUG_PUTGET */
1232259412Sluigi
1233259412Sluigi
1234260368Sluigi/*
1235260368Sluigi * module variables
1236260368Sluigi */
1237285349Sluigi#define NETMAP_BUF_BASE(na)	((na)->na_lut.lut[0].vaddr)
1238285349Sluigi#define NETMAP_BUF_SIZE(na)	((na)->na_lut.objsize)
1239260368Sluigiextern int netmap_mitigate;	// XXX not really used
1240231198Sluigiextern int netmap_no_pendintr;
1241227614Sluigiextern int netmap_verbose;	// XXX debugging
1242227614Sluigienum {                                  /* verbose flags */
1243227614Sluigi	NM_VERB_ON = 1,                 /* generic verbose */
1244227614Sluigi	NM_VERB_HOST = 0x2,             /* verbose host stack */
1245227614Sluigi	NM_VERB_RXSYNC = 0x10,          /* verbose on rxsync/txsync */
1246227614Sluigi	NM_VERB_TXSYNC = 0x20,
1247227614Sluigi	NM_VERB_RXINTR = 0x100,         /* verbose on rx/tx intr (driver) */
1248227614Sluigi	NM_VERB_TXINTR = 0x200,
1249227614Sluigi	NM_VERB_NIC_RXSYNC = 0x1000,    /* verbose on rx/tx intr (driver) */
1250227614Sluigi	NM_VERB_NIC_TXSYNC = 0x2000,
1251227614Sluigi};
1252227614Sluigi
1253259412Sluigiextern int netmap_txsync_retry;
1254259412Sluigiextern int netmap_generic_mit;
1255259412Sluigiextern int netmap_generic_ringsize;
1256261909Sluigiextern int netmap_generic_rings;
1257259412Sluigi
1258227614Sluigi/*
1259228845Sluigi * NA returns a pointer to the struct netmap adapter from the ifp,
1260228845Sluigi * WNA is used to write it.
1261227614Sluigi */
1262228845Sluigi#define	NA(_ifp)	((struct netmap_adapter *)WNA(_ifp))
1263227614Sluigi
1264241719Sluigi/*
1265241719Sluigi * Macros to determine if an interface is netmap capable or netmap enabled.
1266241719Sluigi * See the magic field in struct netmap_adapter.
1267241719Sluigi */
1268241719Sluigi#ifdef __FreeBSD__
1269241719Sluigi/*
1270241719Sluigi * on FreeBSD just use if_capabilities and if_capenable.
1271241719Sluigi */
1272241719Sluigi#define NETMAP_CAPABLE(ifp)	(NA(ifp) &&		\
1273241719Sluigi	(ifp)->if_capabilities & IFCAP_NETMAP )
1274227614Sluigi
1275241719Sluigi#define	NETMAP_SET_CAPABLE(ifp)				\
1276241719Sluigi	(ifp)->if_capabilities |= IFCAP_NETMAP
1277241719Sluigi
1278241719Sluigi#else	/* linux */
1279241719Sluigi
1280241719Sluigi/*
1281241719Sluigi * on linux:
1282241719Sluigi * we check if NA(ifp) is set and its first element has a related
1283241719Sluigi * magic value. The capenable is within the struct netmap_adapter.
1284241719Sluigi */
1285241719Sluigi#define	NETMAP_MAGIC	0x52697a7a
1286241719Sluigi
1287241719Sluigi#define NETMAP_CAPABLE(ifp)	(NA(ifp) &&		\
1288241719Sluigi	((uint32_t)(uintptr_t)NA(ifp) ^ NA(ifp)->magic) == NETMAP_MAGIC )
1289241719Sluigi
1290241719Sluigi#define	NETMAP_SET_CAPABLE(ifp)				\
1291241719Sluigi	NA(ifp)->magic = ((uint32_t)(uintptr_t)NA(ifp)) ^ NETMAP_MAGIC
1292241719Sluigi
1293241719Sluigi#endif	/* linux */
1294241719Sluigi
1295238812Sluigi#ifdef __FreeBSD__
1296259412Sluigi
1297270063Sluigi/* Assigns the device IOMMU domain to an allocator.
1298270063Sluigi * Returns -ENOMEM in case the domain is different */
1299270063Sluigi#define nm_iommu_group_id(dev) (0)
1300270063Sluigi
1301260368Sluigi/* Callback invoked by the dma machinery after a successful dmamap_load */
1302230052Sluigistatic void netmap_dmamap_cb(__unused void *arg,
1303230058Sluigi    __unused bus_dma_segment_t * segs, __unused int nseg, __unused int error)
1304230052Sluigi{
1305230052Sluigi}
1306230052Sluigi
1307230052Sluigi/* bus_dmamap_load wrapper: call aforementioned function if map != NULL.
1308230052Sluigi * XXX can we do it without a callback ?
1309230052Sluigi */
1310230052Sluigistatic inline void
1311270063Sluiginetmap_load_map(struct netmap_adapter *na,
1312270063Sluigi	bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
1313230052Sluigi{
1314230052Sluigi	if (map)
1315270063Sluigi		bus_dmamap_load(tag, map, buf, NETMAP_BUF_SIZE(na),
1316230058Sluigi		    netmap_dmamap_cb, NULL, BUS_DMA_NOWAIT);
1317230052Sluigi}
1318230052Sluigi
1319270063Sluigistatic inline void
1320270063Sluiginetmap_unload_map(struct netmap_adapter *na,
1321270063Sluigi        bus_dma_tag_t tag, bus_dmamap_t map)
1322270063Sluigi{
1323270063Sluigi	if (map)
1324270063Sluigi		bus_dmamap_unload(tag, map);
1325270063Sluigi}
1326270063Sluigi
1327230052Sluigi/* update the map when a buffer changes. */
1328230052Sluigistatic inline void
1329270063Sluiginetmap_reload_map(struct netmap_adapter *na,
1330270063Sluigi	bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
1331230052Sluigi{
1332230052Sluigi	if (map) {
1333230052Sluigi		bus_dmamap_unload(tag, map);
1334270063Sluigi		bus_dmamap_load(tag, map, buf, NETMAP_BUF_SIZE(na),
1335230058Sluigi		    netmap_dmamap_cb, NULL, BUS_DMA_NOWAIT);
1336230052Sluigi	}
1337230052Sluigi}
1338259412Sluigi
1339238812Sluigi#else /* linux */
1340230052Sluigi
1341270063Sluigiint nm_iommu_group_id(bus_dma_tag_t dev);
1342270063Sluigi#include <linux/dma-mapping.h>
1343270063Sluigi
1344270063Sluigistatic inline void
1345270063Sluiginetmap_load_map(struct netmap_adapter *na,
1346270063Sluigi	bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
1347270063Sluigi{
1348285349Sluigi	if (0 && map) {
1349285349Sluigi		*map = dma_map_single(na->pdev, buf, na->na_lut.objsize,
1350270063Sluigi				DMA_BIDIRECTIONAL);
1351270063Sluigi	}
1352270063Sluigi}
1353270063Sluigi
1354270063Sluigistatic inline void
1355270063Sluiginetmap_unload_map(struct netmap_adapter *na,
1356270063Sluigi	bus_dma_tag_t tag, bus_dmamap_t map)
1357270063Sluigi{
1358285349Sluigi	u_int sz = na->na_lut.objsize;
1359270063Sluigi
1360270063Sluigi	if (*map) {
1361270063Sluigi		dma_unmap_single(na->pdev, *map, sz,
1362270063Sluigi				DMA_BIDIRECTIONAL);
1363270063Sluigi	}
1364270063Sluigi}
1365270063Sluigi
1366270063Sluigistatic inline void
1367270063Sluiginetmap_reload_map(struct netmap_adapter *na,
1368270063Sluigi	bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
1369270063Sluigi{
1370285349Sluigi	u_int sz = na->na_lut.objsize;
1371270063Sluigi
1372270063Sluigi	if (*map) {
1373270063Sluigi		dma_unmap_single(na->pdev, *map, sz,
1374270063Sluigi				DMA_BIDIRECTIONAL);
1375270063Sluigi	}
1376270063Sluigi
1377270063Sluigi	*map = dma_map_single(na->pdev, buf, sz,
1378270063Sluigi				DMA_BIDIRECTIONAL);
1379270063Sluigi}
1380270063Sluigi
1381231796Sluigi/*
1382238812Sluigi * XXX How do we redefine these functions:
1383238812Sluigi *
1384238812Sluigi * on linux we need
1385238831Sluigi *	dma_map_single(&pdev->dev, virt_addr, len, direction)
1386238831Sluigi *	dma_unmap_single(&adapter->pdev->dev, phys_addr, len, direction
1387238812Sluigi * The len can be implicit (on netmap it is NETMAP_BUF_SIZE)
1388238812Sluigi * unfortunately the direction is not, so we need to change
1389238812Sluigi * something to have a cross API
1390238812Sluigi */
1391270063Sluigi
1392238812Sluigi#if 0
1393238812Sluigi	struct e1000_buffer *buffer_info =  &tx_ring->buffer_info[l];
1394238812Sluigi	/* set time_stamp *before* dma to help avoid a possible race */
1395238812Sluigi	buffer_info->time_stamp = jiffies;
1396238812Sluigi	buffer_info->mapped_as_page = false;
1397238812Sluigi	buffer_info->length = len;
1398238812Sluigi	//buffer_info->next_to_watch = l;
1399238812Sluigi	/* reload dma map */
1400238812Sluigi	dma_unmap_single(&adapter->pdev->dev, buffer_info->dma,
1401238831Sluigi			NETMAP_BUF_SIZE, DMA_TO_DEVICE);
1402238812Sluigi	buffer_info->dma = dma_map_single(&adapter->pdev->dev,
1403238831Sluigi			addr, NETMAP_BUF_SIZE, DMA_TO_DEVICE);
1404238812Sluigi
1405238812Sluigi	if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) {
1406238812Sluigi		D("dma mapping error");
1407238812Sluigi		/* goto dma_error; See e1000_put_txbuf() */
1408238812Sluigi		/* XXX reset */
1409238812Sluigi	}
1410238812Sluigi	tx_desc->buffer_addr = htole64(buffer_info->dma); //XXX
1411238812Sluigi
1412238812Sluigi#endif
1413238812Sluigi
1414238812Sluigi/*
1415238812Sluigi * The bus_dmamap_sync() can be one of wmb() or rmb() depending on direction.
1416238812Sluigi */
1417238812Sluigi#define bus_dmamap_sync(_a, _b, _c)
1418238812Sluigi
1419238812Sluigi#endif /* linux */
1420238812Sluigi
1421257529Sluigi
1422238812Sluigi/*
1423231796Sluigi * functions to map NIC to KRING indexes (n2k) and vice versa (k2n)
1424231796Sluigi */
1425231796Sluigistatic inline int
1426232238Sluiginetmap_idx_n2k(struct netmap_kring *kr, int idx)
1427231796Sluigi{
1428232238Sluigi	int n = kr->nkr_num_slots;
1429232238Sluigi	idx += kr->nkr_hwofs;
1430232238Sluigi	if (idx < 0)
1431232238Sluigi		return idx + n;
1432232238Sluigi	else if (idx < n)
1433232238Sluigi		return idx;
1434231796Sluigi	else
1435232238Sluigi		return idx - n;
1436231796Sluigi}
1437230052Sluigi
1438231796Sluigi
1439231796Sluigistatic inline int
1440232238Sluiginetmap_idx_k2n(struct netmap_kring *kr, int idx)
1441231796Sluigi{
1442232238Sluigi	int n = kr->nkr_num_slots;
1443232238Sluigi	idx -= kr->nkr_hwofs;
1444232238Sluigi	if (idx < 0)
1445232238Sluigi		return idx + n;
1446232238Sluigi	else if (idx < n)
1447232238Sluigi		return idx;
1448231796Sluigi	else
1449232238Sluigi		return idx - n;
1450231796Sluigi}
1451231796Sluigi
1452231796Sluigi
1453234227Sluigi/* Entries of the look-up table. */
1454234227Sluigistruct lut_entry {
1455234227Sluigi	void *vaddr;		/* virtual address. */
1456250107Sluigi	vm_paddr_t paddr;	/* physical address. */
1457234227Sluigi};
1458234227Sluigi
1459234227Sluigistruct netmap_obj_pool;
1460234227Sluigi
1461227614Sluigi/*
1462229939Sluigi * NMB return the virtual address of a buffer (buffer 0 on bad index)
1463229939Sluigi * PNMB also fills the physical address
1464227614Sluigi */
1465229939Sluigistatic inline void *
1466270063SluigiNMB(struct netmap_adapter *na, struct netmap_slot *slot)
1467227614Sluigi{
1468285349Sluigi	struct lut_entry *lut = na->na_lut.lut;
1469227614Sluigi	uint32_t i = slot->buf_idx;
1470285349Sluigi	return (unlikely(i >= na->na_lut.objtotal)) ?
1471270063Sluigi		lut[0].vaddr : lut[i].vaddr;
1472227614Sluigi}
1473227614Sluigi
1474229939Sluigistatic inline void *
1475270063SluigiPNMB(struct netmap_adapter *na, struct netmap_slot *slot, uint64_t *pp)
1476229939Sluigi{
1477229939Sluigi	uint32_t i = slot->buf_idx;
1478285349Sluigi	struct lut_entry *lut = na->na_lut.lut;
1479285349Sluigi	void *ret = (i >= na->na_lut.objtotal) ? lut[0].vaddr : lut[i].vaddr;
1480249659Sluigi
1481285349Sluigi	*pp = (i >= na->na_lut.objtotal) ? lut[0].paddr : lut[i].paddr;
1482229939Sluigi	return ret;
1483229939Sluigi}
1484229939Sluigi
1485259412Sluigi/* Generic version of NMB, which uses device-specific memory. */
1486259412Sluigi
1487238812Sluigi
1488257529Sluigi
1489259412Sluigivoid netmap_txsync_to_host(struct netmap_adapter *na);
1490257529Sluigi
1491259412Sluigi
1492260368Sluigi/*
1493260368Sluigi * Structure associated to each thread which registered an interface.
1494259412Sluigi *
1495259412Sluigi * The first 4 fields of this structure are written by NIOCREGIF and
1496259412Sluigi * read by poll() and NIOC?XSYNC.
1497260368Sluigi *
1498260368Sluigi * There is low contention among writers (a correct user program
1499260368Sluigi * should have none) and among writers and readers, so we use a
1500260368Sluigi * single global lock to protect the structure initialization;
1501260368Sluigi * since initialization involves the allocation of memory,
1502260368Sluigi * we reuse the memory allocator lock.
1503260368Sluigi *
1504259412Sluigi * Read access to the structure is lock free. Readers must check that
1505259412Sluigi * np_nifp is not NULL before using the other fields.
1506260368Sluigi * If np_nifp is NULL initialization has not been performed,
1507260368Sluigi * so they should return an error to userspace.
1508259412Sluigi *
1509285349Sluigi * The ref_done field (XXX ?) is used to regulate access to the refcount in the
1510259412Sluigi * memory allocator. The refcount must be incremented at most once for
1511259412Sluigi * each open("/dev/netmap"). The increment is performed by the first
1512259412Sluigi * function that calls netmap_get_memory() (currently called by
1513259412Sluigi * mmap(), NIOCGINFO and NIOCREGIF).
1514259412Sluigi * If the refcount is incremented, it is then decremented when the
1515259412Sluigi * private structure is destroyed.
1516259412Sluigi */
1517259412Sluigistruct netmap_priv_d {
1518259412Sluigi	struct netmap_if * volatile np_nifp;	/* netmap if descriptor. */
1519259412Sluigi
1520259412Sluigi	struct netmap_adapter	*np_na;
1521261909Sluigi	uint32_t	np_flags;	/* from the ioctl */
1522285349Sluigi	u_int		np_qfirst[NR_TXRX],
1523285349Sluigi			np_qlast[NR_TXRX]; /* range of tx/rx rings to scan */
1524261909Sluigi	uint16_t	np_txpoll;	/* XXX and also np_rxpoll ? */
1525259412Sluigi
1526259412Sluigi	/* np_refcount is only used on FreeBSD */
1527260368Sluigi	int		np_refcount;	/* use with NMG_LOCK held */
1528261909Sluigi
1529261909Sluigi	/* pointers to the selinfo to be used for selrecord.
1530261909Sluigi	 * Either the local or the global one depending on the
1531261909Sluigi	 * number of rings.
1532261909Sluigi	 */
1533285349Sluigi	NM_SELINFO_T *np_si[NR_TXRX];
1534261909Sluigi	struct thread	*np_td;		/* kqueue, just debugging */
1535259412Sluigi};
1536259412Sluigi
1537270063Sluigi#ifdef WITH_MONITOR
1538259412Sluigi
1539270063Sluigistruct netmap_monitor_adapter {
1540270063Sluigi	struct netmap_adapter up;
1541270063Sluigi
1542270063Sluigi	struct netmap_priv_d priv;
1543270063Sluigi	uint32_t flags;
1544270063Sluigi};
1545270063Sluigi
1546270063Sluigi#endif /* WITH_MONITOR */
1547270063Sluigi
1548270063Sluigi
1549274362Sluigi#ifdef WITH_GENERIC
1550259412Sluigi/*
1551259412Sluigi * generic netmap emulation for devices that do not have
1552259412Sluigi * native netmap support.
1553259412Sluigi */
1554259412Sluigiint generic_netmap_attach(struct ifnet *ifp);
1555259412Sluigi
1556285349Sluigiint netmap_catch_rx(struct netmap_generic_adapter *na, int intercept);
1557259412Sluigivoid generic_rx_handler(struct ifnet *ifp, struct mbuf *m);;
1558260368Sluigivoid netmap_catch_tx(struct netmap_generic_adapter *na, int enable);
1559259412Sluigiint generic_xmit_frame(struct ifnet *ifp, struct mbuf *m, void *addr, u_int len, u_int ring_nr);
1560259412Sluigiint generic_find_num_desc(struct ifnet *ifp, u_int *tx, u_int *rx);
1561259412Sluigivoid generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq);
1562285349Sluigistatic inline struct ifnet*
1563285349Sluiginetmap_generic_getifp(struct netmap_generic_adapter *gna)
1564285349Sluigi{
1565285349Sluigi        if (gna->prev)
1566285349Sluigi            return gna->prev->ifp;
1567259412Sluigi
1568285349Sluigi        return gna->up.up.ifp;
1569285349Sluigi}
1570285349Sluigi
1571270063Sluigi//#define RATE_GENERIC  /* Enables communication statistics for generic. */
1572270063Sluigi#ifdef RATE_GENERIC
1573270063Sluigivoid generic_rate(int txp, int txs, int txi, int rxp, int rxs, int rxi);
1574270063Sluigi#else
1575270063Sluigi#define generic_rate(txp, txs, txi, rxp, rxs, rxi)
1576270063Sluigi#endif
1577270063Sluigi
1578259412Sluigi/*
1579259412Sluigi * netmap_mitigation API. This is used by the generic adapter
1580259412Sluigi * to reduce the number of interrupt requests/selwakeup
1581259412Sluigi * to clients on incoming packets.
1582259412Sluigi */
1583270063Sluigivoid netmap_mitigation_init(struct nm_generic_mit *mit, int idx,
1584270063Sluigi                                struct netmap_adapter *na);
1585261909Sluigivoid netmap_mitigation_start(struct nm_generic_mit *mit);
1586261909Sluigivoid netmap_mitigation_restart(struct nm_generic_mit *mit);
1587261909Sluigiint netmap_mitigation_active(struct nm_generic_mit *mit);
1588261909Sluigivoid netmap_mitigation_cleanup(struct nm_generic_mit *mit);
1589274362Sluigi#endif /* WITH_GENERIC */
1590259412Sluigi
1591261909Sluigi
1592261909Sluigi
1593261909Sluigi/* Shared declarations for the VALE switch. */
1594261909Sluigi
1595261909Sluigi/*
1596261909Sluigi * Each transmit queue accumulates a batch of packets into
1597261909Sluigi * a structure before forwarding. Packets to the same
1598261909Sluigi * destination are put in a list using ft_next as a link field.
1599261909Sluigi * ft_frags and ft_next are valid only on the first fragment.
1600261909Sluigi */
1601261909Sluigistruct nm_bdg_fwd {	/* forwarding entry for a bridge */
1602261909Sluigi	void *ft_buf;		/* netmap or indirect buffer */
1603261909Sluigi	uint8_t ft_frags;	/* how many fragments (only on 1st frag) */
1604261909Sluigi	uint8_t _ft_port;	/* dst port (unused) */
1605261909Sluigi	uint16_t ft_flags;	/* flags, e.g. indirect */
1606261909Sluigi	uint16_t ft_len;	/* src fragment len */
1607261909Sluigi	uint16_t ft_next;	/* next packet to same destination */
1608261909Sluigi};
1609261909Sluigi
1610261909Sluigi/* struct 'virtio_net_hdr' from linux. */
1611261909Sluigistruct nm_vnet_hdr {
1612261909Sluigi#define VIRTIO_NET_HDR_F_NEEDS_CSUM     1	/* Use csum_start, csum_offset */
1613261909Sluigi#define VIRTIO_NET_HDR_F_DATA_VALID    2	/* Csum is valid */
1614261909Sluigi    uint8_t flags;
1615261909Sluigi#define VIRTIO_NET_HDR_GSO_NONE         0       /* Not a GSO frame */
1616261909Sluigi#define VIRTIO_NET_HDR_GSO_TCPV4        1       /* GSO frame, IPv4 TCP (TSO) */
1617261909Sluigi#define VIRTIO_NET_HDR_GSO_UDP          3       /* GSO frame, IPv4 UDP (UFO) */
1618261909Sluigi#define VIRTIO_NET_HDR_GSO_TCPV6        4       /* GSO frame, IPv6 TCP */
1619261909Sluigi#define VIRTIO_NET_HDR_GSO_ECN          0x80    /* TCP has ECN set */
1620261909Sluigi    uint8_t gso_type;
1621261909Sluigi    uint16_t hdr_len;
1622261909Sluigi    uint16_t gso_size;
1623261909Sluigi    uint16_t csum_start;
1624261909Sluigi    uint16_t csum_offset;
1625261909Sluigi};
1626261909Sluigi
1627261909Sluigi#define WORST_CASE_GSO_HEADER	(14+40+60)  /* IPv6 + TCP */
1628261909Sluigi
1629261909Sluigi/* Private definitions for IPv4, IPv6, UDP and TCP headers. */
1630261909Sluigi
1631261909Sluigistruct nm_iphdr {
1632261909Sluigi	uint8_t		version_ihl;
1633261909Sluigi	uint8_t		tos;
1634261909Sluigi	uint16_t	tot_len;
1635261909Sluigi	uint16_t	id;
1636261909Sluigi	uint16_t	frag_off;
1637261909Sluigi	uint8_t		ttl;
1638261909Sluigi	uint8_t		protocol;
1639261909Sluigi	uint16_t	check;
1640261909Sluigi	uint32_t	saddr;
1641261909Sluigi	uint32_t	daddr;
1642261909Sluigi	/*The options start here. */
1643261909Sluigi};
1644261909Sluigi
1645261909Sluigistruct nm_tcphdr {
1646261909Sluigi	uint16_t	source;
1647261909Sluigi	uint16_t	dest;
1648261909Sluigi	uint32_t	seq;
1649261909Sluigi	uint32_t	ack_seq;
1650261909Sluigi	uint8_t		doff;  /* Data offset + Reserved */
1651261909Sluigi	uint8_t		flags;
1652261909Sluigi	uint16_t	window;
1653261909Sluigi	uint16_t	check;
1654261909Sluigi	uint16_t	urg_ptr;
1655261909Sluigi};
1656261909Sluigi
1657261909Sluigistruct nm_udphdr {
1658261909Sluigi	uint16_t	source;
1659261909Sluigi	uint16_t	dest;
1660261909Sluigi	uint16_t	len;
1661261909Sluigi	uint16_t	check;
1662261909Sluigi};
1663261909Sluigi
1664261909Sluigistruct nm_ipv6hdr {
1665261909Sluigi	uint8_t		priority_version;
1666261909Sluigi	uint8_t		flow_lbl[3];
1667261909Sluigi
1668261909Sluigi	uint16_t	payload_len;
1669261909Sluigi	uint8_t		nexthdr;
1670261909Sluigi	uint8_t		hop_limit;
1671261909Sluigi
1672261909Sluigi	uint8_t		saddr[16];
1673261909Sluigi	uint8_t		daddr[16];
1674261909Sluigi};
1675261909Sluigi
1676261909Sluigi/* Type used to store a checksum (in host byte order) that hasn't been
1677261909Sluigi * folded yet.
1678261909Sluigi */
1679261909Sluigi#define rawsum_t uint32_t
1680261909Sluigi
1681261909Sluigirawsum_t nm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum);
1682261909Sluigiuint16_t nm_csum_ipv4(struct nm_iphdr *iph);
1683261909Sluigivoid nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data,
1684261909Sluigi		      size_t datalen, uint16_t *check);
1685261909Sluigivoid nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
1686261909Sluigi		      size_t datalen, uint16_t *check);
1687261909Sluigiuint16_t nm_csum_fold(rawsum_t cur_sum);
1688261909Sluigi
1689261909Sluigivoid bdg_mismatch_datapath(struct netmap_vp_adapter *na,
1690261909Sluigi			   struct netmap_vp_adapter *dst_na,
1691261909Sluigi			   struct nm_bdg_fwd *ft_p, struct netmap_ring *ring,
1692261909Sluigi			   u_int *j, u_int lim, u_int *howmany);
1693270063Sluigi
1694270063Sluigi/* persistent virtual port routines */
1695270063Sluigiint nm_vi_persist(const char *, struct ifnet **);
1696270063Sluigivoid nm_vi_detach(struct ifnet *);
1697270063Sluigivoid nm_vi_init_index(void);
1698270063Sluigi
1699227614Sluigi#endif /* _NET_NETMAP_KERN_H_ */
1700