1262153Sluigi/*
2262153Sluigi * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
3262153Sluigi *
4262153Sluigi * Redistribution and use in source and binary forms, with or without
5262153Sluigi * modification, are permitted provided that the following conditions
6262153Sluigi * are met:
7262153Sluigi *   1. Redistributions of source code must retain the above copyright
8262153Sluigi *      notice, this list of conditions and the following disclaimer.
9262153Sluigi *   2. Redistributions in binary form must reproduce the above copyright
10262153Sluigi *      notice, this list of conditions and the following disclaimer in the
11262153Sluigi *      documentation and/or other materials provided with the distribution.
12262153Sluigi *
13262153Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14262153Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15262153Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16262153Sluigi * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17262153Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18262153Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19262153Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20262153Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21262153Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22262153Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23262153Sluigi * SUCH DAMAGE.
24262153Sluigi */
25262153Sluigi
26262153Sluigi
27262153Sluigi/*
28262153Sluigi * This module implements the VALE switch for netmap
29262153Sluigi
30262153Sluigi--- VALE SWITCH ---
31262153Sluigi
32262153SluigiNMG_LOCK() serializes all modifications to switches and ports.
33262153SluigiA switch cannot be deleted until all ports are gone.
34262153Sluigi
35262153SluigiFor each switch, an SX lock (RWlock on linux) protects
36262153Sluigideletion of ports. When configuring or deleting a new port, the
37262153Sluigilock is acquired in exclusive mode (after holding NMG_LOCK).
38262153SluigiWhen forwarding, the lock is acquired in shared mode (without NMG_LOCK).
39262153SluigiThe lock is held throughout the entire forwarding cycle,
40262153Sluigiduring which the thread may incur in a page fault.
41262153SluigiHence it is important that sleepable shared locks are used.
42262153Sluigi
43262153SluigiOn the rx ring, the per-port lock is grabbed initially to reserve
44262153Sluigia number of slot in the ring, then the lock is released,
45262153Sluigipackets are copied from source to destination, and then
46262153Sluigithe lock is acquired again and the receive ring is updated.
47262153Sluigi(A similar thing is done on the tx ring for NIC and host stack
48262153Sluigiports attached to the switch)
49262153Sluigi
50262153Sluigi */
51262153Sluigi
52262153Sluigi/*
53262153Sluigi * OS-specific code that is used only within this file.
54262153Sluigi * Other OS-specific code that must be accessed by drivers
55262153Sluigi * is present in netmap_kern.h
56262153Sluigi */
57262153Sluigi
58262153Sluigi#if defined(__FreeBSD__)
59262153Sluigi#include <sys/cdefs.h> /* prerequisite */
60262153Sluigi__FBSDID("$FreeBSD$");
61262153Sluigi
62262153Sluigi#include <sys/types.h>
63262153Sluigi#include <sys/errno.h>
64262153Sluigi#include <sys/param.h>	/* defines used in kernel.h */
65262153Sluigi#include <sys/kernel.h>	/* types used in module initialization */
66262153Sluigi#include <sys/conf.h>	/* cdevsw struct, UID, GID */
67262153Sluigi#include <sys/sockio.h>
68262153Sluigi#include <sys/socketvar.h>	/* struct socket */
69262153Sluigi#include <sys/malloc.h>
70262153Sluigi#include <sys/poll.h>
71262153Sluigi#include <sys/rwlock.h>
72262153Sluigi#include <sys/socket.h> /* sockaddrs */
73262153Sluigi#include <sys/selinfo.h>
74262153Sluigi#include <sys/sysctl.h>
75262153Sluigi#include <net/if.h>
76262153Sluigi#include <net/if_var.h>
77262153Sluigi#include <net/bpf.h>		/* BIOCIMMEDIATE */
78262153Sluigi#include <machine/bus.h>	/* bus_dmamap_* */
79262153Sluigi#include <sys/endian.h>
80262153Sluigi#include <sys/refcount.h>
81262153Sluigi
82262153Sluigi
83262153Sluigi#define BDG_RWLOCK_T		struct rwlock // struct rwlock
84262153Sluigi
85262153Sluigi#define	BDG_RWINIT(b)		\
86262153Sluigi	rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS)
87262153Sluigi#define BDG_WLOCK(b)		rw_wlock(&(b)->bdg_lock)
88262153Sluigi#define BDG_WUNLOCK(b)		rw_wunlock(&(b)->bdg_lock)
89262153Sluigi#define BDG_RLOCK(b)		rw_rlock(&(b)->bdg_lock)
90262153Sluigi#define BDG_RTRYLOCK(b)		rw_try_rlock(&(b)->bdg_lock)
91262153Sluigi#define BDG_RUNLOCK(b)		rw_runlock(&(b)->bdg_lock)
92262153Sluigi#define BDG_RWDESTROY(b)	rw_destroy(&(b)->bdg_lock)
93262153Sluigi
94262153Sluigi
95262153Sluigi#elif defined(linux)
96262153Sluigi
97262153Sluigi#include "bsd_glue.h"
98262153Sluigi
99262153Sluigi#elif defined(__APPLE__)
100262153Sluigi
101262153Sluigi#warning OSX support is only partial
102262153Sluigi#include "osx_glue.h"
103262153Sluigi
104262153Sluigi#else
105262153Sluigi
106262153Sluigi#error	Unsupported platform
107262153Sluigi
108262153Sluigi#endif /* unsupported */
109262153Sluigi
110262153Sluigi/*
111262153Sluigi * common headers
112262153Sluigi */
113262153Sluigi
114262153Sluigi#include <net/netmap.h>
115262153Sluigi#include <dev/netmap/netmap_kern.h>
116262153Sluigi#include <dev/netmap/netmap_mem2.h>
117262153Sluigi
118262153Sluigi#ifdef WITH_VALE
119262153Sluigi
120262153Sluigi/*
121262153Sluigi * system parameters (most of them in netmap_kern.h)
122262153Sluigi * NM_NAME	prefix for switch port names, default "vale"
123262153Sluigi * NM_BDG_MAXPORTS	number of ports
124262153Sluigi * NM_BRIDGES	max number of switches in the system.
125262153Sluigi *	XXX should become a sysctl or tunable
126262153Sluigi *
127262153Sluigi * Switch ports are named valeX:Y where X is the switch name and Y
128262153Sluigi * is the port. If Y matches a physical interface name, the port is
129262153Sluigi * connected to a physical device.
130262153Sluigi *
131262153Sluigi * Unlike physical interfaces, switch ports use their own memory region
132262153Sluigi * for rings and buffers.
133262153Sluigi * The virtual interfaces use per-queue lock instead of core lock.
134262153Sluigi * In the tx loop, we aggregate traffic in batches to make all operations
135262153Sluigi * faster. The batch size is bridge_batch.
136262153Sluigi */
137262153Sluigi#define NM_BDG_MAXRINGS		16	/* XXX unclear how many. */
138262153Sluigi#define NM_BDG_MAXSLOTS		4096	/* XXX same as above */
139262153Sluigi#define NM_BRIDGE_RINGSIZE	1024	/* in the device */
140262153Sluigi#define NM_BDG_HASH		1024	/* forwarding table entries */
141262153Sluigi#define NM_BDG_BATCH		1024	/* entries in the forwarding buffer */
142262153Sluigi#define NM_MULTISEG		64	/* max size of a chain of bufs */
143262153Sluigi/* actual size of the tables */
144262153Sluigi#define NM_BDG_BATCH_MAX	(NM_BDG_BATCH + NM_MULTISEG)
145262153Sluigi/* NM_FT_NULL terminates a list of slots in the ft */
146262153Sluigi#define NM_FT_NULL		NM_BDG_BATCH_MAX
147262153Sluigi#define	NM_BRIDGES		8	/* number of bridges */
148262153Sluigi
149262153Sluigi
150262153Sluigi/*
151262153Sluigi * bridge_batch is set via sysctl to the max batch size to be
152262153Sluigi * used in the bridge. The actual value may be larger as the
153262153Sluigi * last packet in the block may overflow the size.
154262153Sluigi */
155262153Sluigiint bridge_batch = NM_BDG_BATCH; /* bridge batch size */
156262153SluigiSYSCTL_DECL(_dev_netmap);
157262153SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
158262153Sluigi
159262153Sluigi
160262153Sluigistatic int bdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp);
161262153Sluigistatic int bdg_netmap_reg(struct netmap_adapter *na, int onoff);
162262153Sluigistatic int netmap_bwrap_attach(struct ifnet *, struct ifnet *);
163262153Sluigistatic int netmap_bwrap_register(struct netmap_adapter *, int onoff);
164262153Sluigiint kern_netmap_regif(struct nmreq *nmr);
165262153Sluigi
166262153Sluigi/*
167262153Sluigi * For each output interface, nm_bdg_q is used to construct a list.
168262153Sluigi * bq_len is the number of output buffers (we can have coalescing
169262153Sluigi * during the copy).
170262153Sluigi */
171262153Sluigistruct nm_bdg_q {
172262153Sluigi	uint16_t bq_head;
173262153Sluigi	uint16_t bq_tail;
174262153Sluigi	uint32_t bq_len;	/* number of buffers */
175262153Sluigi};
176262153Sluigi
177262153Sluigi/* XXX revise this */
178262153Sluigistruct nm_hash_ent {
179262153Sluigi	uint64_t	mac;	/* the top 2 bytes are the epoch */
180262153Sluigi	uint64_t	ports;
181262153Sluigi};
182262153Sluigi
183262153Sluigi/*
184262153Sluigi * nm_bridge is a descriptor for a VALE switch.
185262153Sluigi * Interfaces for a bridge are all in bdg_ports[].
186262153Sluigi * The array has fixed size, an empty entry does not terminate
187262153Sluigi * the search, but lookups only occur on attach/detach so we
188262153Sluigi * don't mind if they are slow.
189262153Sluigi *
190262153Sluigi * The bridge is non blocking on the transmit ports: excess
191262153Sluigi * packets are dropped if there is no room on the output port.
192262153Sluigi *
193262153Sluigi * bdg_lock protects accesses to the bdg_ports array.
194262153Sluigi * This is a rw lock (or equivalent).
195262153Sluigi */
196262153Sluigistruct nm_bridge {
197262153Sluigi	/* XXX what is the proper alignment/layout ? */
198262153Sluigi	BDG_RWLOCK_T	bdg_lock;	/* protects bdg_ports */
199262153Sluigi	int		bdg_namelen;
200262153Sluigi	uint32_t	bdg_active_ports; /* 0 means free */
201262153Sluigi	char		bdg_basename[IFNAMSIZ];
202262153Sluigi
203262153Sluigi	/* Indexes of active ports (up to active_ports)
204262153Sluigi	 * and all other remaining ports.
205262153Sluigi	 */
206262153Sluigi	uint8_t		bdg_port_index[NM_BDG_MAXPORTS];
207262153Sluigi
208262153Sluigi	struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS];
209262153Sluigi
210262153Sluigi
211262153Sluigi	/*
212262153Sluigi	 * The function to decide the destination port.
213262153Sluigi	 * It returns either of an index of the destination port,
214262153Sluigi	 * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
215262153Sluigi	 * forward this packet.  ring_nr is the source ring index, and the
216262153Sluigi	 * function may overwrite this value to forward this packet to a
217262153Sluigi	 * different ring index.
218262153Sluigi	 * This function must be set by netmap_bdgctl().
219262153Sluigi	 */
220262153Sluigi	bdg_lookup_fn_t nm_bdg_lookup;
221262153Sluigi
222262153Sluigi	/* the forwarding table, MAC+ports.
223262153Sluigi	 * XXX should be changed to an argument to be passed to
224262153Sluigi	 * the lookup function, and allocated on attach
225262153Sluigi	 */
226262153Sluigi	struct nm_hash_ent ht[NM_BDG_HASH];
227262153Sluigi};
228262153Sluigi
229262153Sluigi
230262153Sluigi/*
231262153Sluigi * XXX in principle nm_bridges could be created dynamically
232262153Sluigi * Right now we have a static array and deletions are protected
233262153Sluigi * by an exclusive lock.
234262153Sluigi */
235262153Sluigistruct nm_bridge nm_bridges[NM_BRIDGES];
236262153Sluigi
237262153Sluigi
238262153Sluigi/*
239262153Sluigi * this is a slightly optimized copy routine which rounds
240262153Sluigi * to multiple of 64 bytes and is often faster than dealing
241262153Sluigi * with other odd sizes. We assume there is enough room
242262153Sluigi * in the source and destination buffers.
243262153Sluigi *
244262153Sluigi * XXX only for multiples of 64 bytes, non overlapped.
245262153Sluigi */
246262153Sluigistatic inline void
247262153Sluigipkt_copy(void *_src, void *_dst, int l)
248262153Sluigi{
249262153Sluigi        uint64_t *src = _src;
250262153Sluigi        uint64_t *dst = _dst;
251262153Sluigi        if (unlikely(l >= 1024)) {
252262153Sluigi                memcpy(dst, src, l);
253262153Sluigi                return;
254262153Sluigi        }
255262153Sluigi        for (; likely(l > 0); l-=64) {
256262153Sluigi                *dst++ = *src++;
257262153Sluigi                *dst++ = *src++;
258262153Sluigi                *dst++ = *src++;
259262153Sluigi                *dst++ = *src++;
260262153Sluigi                *dst++ = *src++;
261262153Sluigi                *dst++ = *src++;
262262153Sluigi                *dst++ = *src++;
263262153Sluigi                *dst++ = *src++;
264262153Sluigi        }
265262153Sluigi}
266262153Sluigi
267262153Sluigi
268262153Sluigi/*
269262153Sluigi * locate a bridge among the existing ones.
270262153Sluigi * MUST BE CALLED WITH NMG_LOCK()
271262153Sluigi *
272262153Sluigi * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
273262153Sluigi * We assume that this is called with a name of at least NM_NAME chars.
274262153Sluigi */
275262153Sluigistatic struct nm_bridge *
276262153Sluiginm_find_bridge(const char *name, int create)
277262153Sluigi{
278262153Sluigi	int i, l, namelen;
279262153Sluigi	struct nm_bridge *b = NULL;
280262153Sluigi
281262153Sluigi	NMG_LOCK_ASSERT();
282262153Sluigi
283262153Sluigi	namelen = strlen(NM_NAME);	/* base length */
284262153Sluigi	l = name ? strlen(name) : 0;		/* actual length */
285262153Sluigi	if (l < namelen) {
286262153Sluigi		D("invalid bridge name %s", name ? name : NULL);
287262153Sluigi		return NULL;
288262153Sluigi	}
289262153Sluigi	for (i = namelen + 1; i < l; i++) {
290262153Sluigi		if (name[i] == ':') {
291262153Sluigi			namelen = i;
292262153Sluigi			break;
293262153Sluigi		}
294262153Sluigi	}
295262153Sluigi	if (namelen >= IFNAMSIZ)
296262153Sluigi		namelen = IFNAMSIZ;
297262153Sluigi	ND("--- prefix is '%.*s' ---", namelen, name);
298262153Sluigi
299262153Sluigi	/* lookup the name, remember empty slot if there is one */
300262153Sluigi	for (i = 0; i < NM_BRIDGES; i++) {
301262153Sluigi		struct nm_bridge *x = nm_bridges + i;
302262153Sluigi
303262153Sluigi		if (x->bdg_active_ports == 0) {
304262153Sluigi			if (create && b == NULL)
305262153Sluigi				b = x;	/* record empty slot */
306262153Sluigi		} else if (x->bdg_namelen != namelen) {
307262153Sluigi			continue;
308262153Sluigi		} else if (strncmp(name, x->bdg_basename, namelen) == 0) {
309262153Sluigi			ND("found '%.*s' at %d", namelen, name, i);
310262153Sluigi			b = x;
311262153Sluigi			break;
312262153Sluigi		}
313262153Sluigi	}
314262153Sluigi	if (i == NM_BRIDGES && b) { /* name not found, can create entry */
315262153Sluigi		/* initialize the bridge */
316262153Sluigi		strncpy(b->bdg_basename, name, namelen);
317262153Sluigi		ND("create new bridge %s with ports %d", b->bdg_basename,
318262153Sluigi			b->bdg_active_ports);
319262153Sluigi		b->bdg_namelen = namelen;
320262153Sluigi		b->bdg_active_ports = 0;
321262153Sluigi		for (i = 0; i < NM_BDG_MAXPORTS; i++)
322262153Sluigi			b->bdg_port_index[i] = i;
323262153Sluigi		/* set the default function */
324262153Sluigi		b->nm_bdg_lookup = netmap_bdg_learning;
325262153Sluigi		/* reset the MAC address table */
326262153Sluigi		bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
327262153Sluigi	}
328262153Sluigi	return b;
329262153Sluigi}
330262153Sluigi
331262153Sluigi
332262153Sluigi/*
333262153Sluigi * Free the forwarding tables for rings attached to switch ports.
334262153Sluigi */
335262153Sluigistatic void
336262153Sluiginm_free_bdgfwd(struct netmap_adapter *na)
337262153Sluigi{
338262153Sluigi	int nrings, i;
339262153Sluigi	struct netmap_kring *kring;
340262153Sluigi
341262153Sluigi	NMG_LOCK_ASSERT();
342262153Sluigi	nrings = na->num_tx_rings;
343262153Sluigi	kring = na->tx_rings;
344262153Sluigi	for (i = 0; i < nrings; i++) {
345262153Sluigi		if (kring[i].nkr_ft) {
346262153Sluigi			free(kring[i].nkr_ft, M_DEVBUF);
347262153Sluigi			kring[i].nkr_ft = NULL; /* protect from freeing twice */
348262153Sluigi		}
349262153Sluigi	}
350262153Sluigi}
351262153Sluigi
352262153Sluigi
353262153Sluigi/*
354262153Sluigi * Allocate the forwarding tables for the rings attached to the bridge ports.
355262153Sluigi */
356262153Sluigistatic int
357262153Sluiginm_alloc_bdgfwd(struct netmap_adapter *na)
358262153Sluigi{
359262153Sluigi	int nrings, l, i, num_dstq;
360262153Sluigi	struct netmap_kring *kring;
361262153Sluigi
362262153Sluigi	NMG_LOCK_ASSERT();
363262153Sluigi	/* all port:rings + broadcast */
364262153Sluigi	num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
365262153Sluigi	l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
366262153Sluigi	l += sizeof(struct nm_bdg_q) * num_dstq;
367262153Sluigi	l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
368262153Sluigi
369262153Sluigi	nrings = netmap_real_tx_rings(na);
370262153Sluigi	kring = na->tx_rings;
371262153Sluigi	for (i = 0; i < nrings; i++) {
372262153Sluigi		struct nm_bdg_fwd *ft;
373262153Sluigi		struct nm_bdg_q *dstq;
374262153Sluigi		int j;
375262153Sluigi
376262153Sluigi		ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
377262153Sluigi		if (!ft) {
378262153Sluigi			nm_free_bdgfwd(na);
379262153Sluigi			return ENOMEM;
380262153Sluigi		}
381262153Sluigi		dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
382262153Sluigi		for (j = 0; j < num_dstq; j++) {
383262153Sluigi			dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
384262153Sluigi			dstq[j].bq_len = 0;
385262153Sluigi		}
386262153Sluigi		kring[i].nkr_ft = ft;
387262153Sluigi	}
388262153Sluigi	return 0;
389262153Sluigi}
390262153Sluigi
391262153Sluigi
392262153Sluigistatic void
393262153Sluiginetmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
394262153Sluigi{
395262153Sluigi	int s_hw = hw, s_sw = sw;
396262153Sluigi	int i, lim =b->bdg_active_ports;
397262153Sluigi	uint8_t tmp[NM_BDG_MAXPORTS];
398262153Sluigi
399262153Sluigi	/*
400262153Sluigi	New algorithm:
401262153Sluigi	make a copy of bdg_port_index;
402262153Sluigi	lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
403262153Sluigi	in the array of bdg_port_index, replacing them with
404262153Sluigi	entries from the bottom of the array;
405262153Sluigi	decrement bdg_active_ports;
406262153Sluigi	acquire BDG_WLOCK() and copy back the array.
407262153Sluigi	 */
408262153Sluigi
409262153Sluigi	if (netmap_verbose)
410262153Sluigi		D("detach %d and %d (lim %d)", hw, sw, lim);
411262153Sluigi	/* make a copy of the list of active ports, update it,
412262153Sluigi	 * and then copy back within BDG_WLOCK().
413262153Sluigi	 */
414262153Sluigi	memcpy(tmp, b->bdg_port_index, sizeof(tmp));
415262153Sluigi	for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
416262153Sluigi		if (hw >= 0 && tmp[i] == hw) {
417262153Sluigi			ND("detach hw %d at %d", hw, i);
418262153Sluigi			lim--; /* point to last active port */
419262153Sluigi			tmp[i] = tmp[lim]; /* swap with i */
420262153Sluigi			tmp[lim] = hw;	/* now this is inactive */
421262153Sluigi			hw = -1;
422262153Sluigi		} else if (sw >= 0 && tmp[i] == sw) {
423262153Sluigi			ND("detach sw %d at %d", sw, i);
424262153Sluigi			lim--;
425262153Sluigi			tmp[i] = tmp[lim];
426262153Sluigi			tmp[lim] = sw;
427262153Sluigi			sw = -1;
428262153Sluigi		} else {
429262153Sluigi			i++;
430262153Sluigi		}
431262153Sluigi	}
432262153Sluigi	if (hw >= 0 || sw >= 0) {
433262153Sluigi		D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
434262153Sluigi	}
435262153Sluigi
436262153Sluigi	BDG_WLOCK(b);
437262153Sluigi	b->bdg_ports[s_hw] = NULL;
438262153Sluigi	if (s_sw >= 0) {
439262153Sluigi		b->bdg_ports[s_sw] = NULL;
440262153Sluigi	}
441262153Sluigi	memcpy(b->bdg_port_index, tmp, sizeof(tmp));
442262153Sluigi	b->bdg_active_ports = lim;
443262153Sluigi	BDG_WUNLOCK(b);
444262153Sluigi
445262153Sluigi	ND("now %d active ports", lim);
446262153Sluigi	if (lim == 0) {
447262153Sluigi		ND("marking bridge %s as free", b->bdg_basename);
448262153Sluigi		b->nm_bdg_lookup = NULL;
449262153Sluigi	}
450262153Sluigi}
451262153Sluigi
452262153Sluigi
453262153Sluigistatic void
454262153Sluiginetmap_adapter_vp_dtor(struct netmap_adapter *na)
455262153Sluigi{
456262153Sluigi	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
457262153Sluigi	struct nm_bridge *b = vpna->na_bdg;
458262153Sluigi	struct ifnet *ifp = na->ifp;
459262153Sluigi
460262153Sluigi	ND("%s has %d references", NM_IFPNAME(ifp), na->na_refcount);
461262153Sluigi
462262153Sluigi	if (b) {
463262153Sluigi		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
464262153Sluigi	}
465262153Sluigi
466262153Sluigi	bzero(ifp, sizeof(*ifp));
467262153Sluigi	free(ifp, M_DEVBUF);
468262153Sluigi	na->ifp = NULL;
469262153Sluigi}
470262153Sluigi
471262153Sluigi
472262153Sluigi/* Try to get a reference to a netmap adapter attached to a VALE switch.
473262153Sluigi * If the adapter is found (or is created), this function returns 0, a
474262153Sluigi * non NULL pointer is returned into *na, and the caller holds a
475262153Sluigi * reference to the adapter.
476262153Sluigi * If an adapter is not found, then no reference is grabbed and the
477262153Sluigi * function returns an error code, or 0 if there is just a VALE prefix
478262153Sluigi * mismatch. Therefore the caller holds a reference when
479262153Sluigi * (*na != NULL && return == 0).
480262153Sluigi */
481262153Sluigiint
482262153Sluiginetmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
483262153Sluigi{
484262153Sluigi	const char *name = nmr->nr_name;
485262153Sluigi	struct ifnet *ifp;
486262153Sluigi	int error = 0;
487262153Sluigi	struct netmap_adapter *ret;
488262153Sluigi	struct netmap_vp_adapter *vpna;
489262153Sluigi	struct nm_bridge *b;
490262153Sluigi	int i, j, cand = -1, cand2 = -1;
491262153Sluigi	int needed;
492262153Sluigi
493262153Sluigi	*na = NULL;     /* default return value */
494262153Sluigi
495262153Sluigi	/* first try to see if this is a bridge port. */
496262153Sluigi	NMG_LOCK_ASSERT();
497262153Sluigi	if (strncmp(name, NM_NAME, sizeof(NM_NAME) - 1)) {
498262153Sluigi		return 0;  /* no error, but no VALE prefix */
499262153Sluigi	}
500262153Sluigi
501262153Sluigi	b = nm_find_bridge(name, create);
502262153Sluigi	if (b == NULL) {
503262153Sluigi		D("no bridges available for '%s'", name);
504262153Sluigi		return (create ? ENOMEM : ENXIO);
505262153Sluigi	}
506262153Sluigi
507262153Sluigi	/* Now we are sure that name starts with the bridge's name,
508262153Sluigi	 * lookup the port in the bridge. We need to scan the entire
509262153Sluigi	 * list. It is not important to hold a WLOCK on the bridge
510262153Sluigi	 * during the search because NMG_LOCK already guarantees
511262153Sluigi	 * that there are no other possible writers.
512262153Sluigi	 */
513262153Sluigi
514262153Sluigi	/* lookup in the local list of ports */
515262153Sluigi	for (j = 0; j < b->bdg_active_ports; j++) {
516262153Sluigi		i = b->bdg_port_index[j];
517262153Sluigi		vpna = b->bdg_ports[i];
518262153Sluigi		// KASSERT(na != NULL);
519262153Sluigi		ifp = vpna->up.ifp;
520262153Sluigi		/* XXX make sure the name only contains one : */
521262153Sluigi		if (!strcmp(NM_IFPNAME(ifp), name)) {
522262153Sluigi			netmap_adapter_get(&vpna->up);
523262153Sluigi			ND("found existing if %s refs %d", name,
524262153Sluigi				vpna->na_bdg_refcount);
525262153Sluigi			*na = (struct netmap_adapter *)vpna;
526262153Sluigi			return 0;
527262153Sluigi		}
528262153Sluigi	}
529262153Sluigi	/* not found, should we create it? */
530262153Sluigi	if (!create)
531262153Sluigi		return ENXIO;
532262153Sluigi	/* yes we should, see if we have space to attach entries */
533262153Sluigi	needed = 2; /* in some cases we only need 1 */
534262153Sluigi	if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
535262153Sluigi		D("bridge full %d, cannot create new port", b->bdg_active_ports);
536262153Sluigi		return ENOMEM;
537262153Sluigi	}
538262153Sluigi	/* record the next two ports available, but do not allocate yet */
539262153Sluigi	cand = b->bdg_port_index[b->bdg_active_ports];
540262153Sluigi	cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
541262153Sluigi	ND("+++ bridge %s port %s used %d avail %d %d",
542262153Sluigi		b->bdg_basename, name, b->bdg_active_ports, cand, cand2);
543262153Sluigi
544262153Sluigi	/*
545262153Sluigi	 * try see if there is a matching NIC with this name
546262153Sluigi	 * (after the bridge's name)
547262153Sluigi	 */
548262153Sluigi	ifp = ifunit_ref(name + b->bdg_namelen + 1);
549262153Sluigi	if (!ifp) { /* this is a virtual port */
550262153Sluigi		if (nmr->nr_cmd) {
551262153Sluigi			/* nr_cmd must be 0 for a virtual port */
552262153Sluigi			return EINVAL;
553262153Sluigi		}
554262153Sluigi
555262153Sluigi	 	/* create a struct ifnet for the new port.
556262153Sluigi		 * need M_NOWAIT as we are under nma_lock
557262153Sluigi		 */
558262153Sluigi		ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
559262153Sluigi		if (!ifp)
560262153Sluigi			return ENOMEM;
561262153Sluigi
562262153Sluigi		strcpy(ifp->if_xname, name);
563262153Sluigi		/* bdg_netmap_attach creates a struct netmap_adapter */
564262153Sluigi		error = bdg_netmap_attach(nmr, ifp);
565262153Sluigi		if (error) {
566262153Sluigi			D("error %d", error);
567262153Sluigi			free(ifp, M_DEVBUF);
568262153Sluigi			return error;
569262153Sluigi		}
570262153Sluigi		ret = NA(ifp);
571262153Sluigi		cand2 = -1;	/* only need one port */
572262153Sluigi	} else {  /* this is a NIC */
573262153Sluigi		struct ifnet *fake_ifp;
574262153Sluigi
575262153Sluigi		error = netmap_get_hw_na(ifp, &ret);
576262153Sluigi		if (error || ret == NULL)
577262153Sluigi			goto out;
578262153Sluigi
579262153Sluigi		/* make sure the NIC is not already in use */
580262153Sluigi		if (NETMAP_OWNED_BY_ANY(ret)) {
581262153Sluigi			D("NIC %s busy, cannot attach to bridge",
582262153Sluigi				NM_IFPNAME(ifp));
583262153Sluigi			error = EBUSY;
584262153Sluigi			goto out;
585262153Sluigi		}
586262153Sluigi		/* create a fake interface */
587262153Sluigi		fake_ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
588262153Sluigi		if (!fake_ifp) {
589262153Sluigi			error = ENOMEM;
590262153Sluigi			goto out;
591262153Sluigi		}
592262153Sluigi		strcpy(fake_ifp->if_xname, name);
593262153Sluigi		error = netmap_bwrap_attach(fake_ifp, ifp);
594262153Sluigi		if (error) {
595262153Sluigi			free(fake_ifp, M_DEVBUF);
596262153Sluigi			goto out;
597262153Sluigi		}
598262153Sluigi		ret = NA(fake_ifp);
599262153Sluigi		if (nmr->nr_arg1 != NETMAP_BDG_HOST)
600262153Sluigi			cand2 = -1; /* only need one port */
601262153Sluigi		if_rele(ifp);
602262153Sluigi	}
603262153Sluigi	vpna = (struct netmap_vp_adapter *)ret;
604262153Sluigi
605262153Sluigi	BDG_WLOCK(b);
606262153Sluigi	vpna->bdg_port = cand;
607262153Sluigi	ND("NIC  %p to bridge port %d", vpna, cand);
608262153Sluigi	/* bind the port to the bridge (virtual ports are not active) */
609262153Sluigi	b->bdg_ports[cand] = vpna;
610262153Sluigi	vpna->na_bdg = b;
611262153Sluigi	b->bdg_active_ports++;
612262153Sluigi	if (cand2 >= 0) {
613262153Sluigi		struct netmap_vp_adapter *hostna = vpna + 1;
614262153Sluigi		/* also bind the host stack to the bridge */
615262153Sluigi		b->bdg_ports[cand2] = hostna;
616262153Sluigi		hostna->bdg_port = cand2;
617262153Sluigi		hostna->na_bdg = b;
618262153Sluigi		b->bdg_active_ports++;
619262153Sluigi		ND("host %p to bridge port %d", hostna, cand2);
620262153Sluigi	}
621262153Sluigi	ND("if %s refs %d", name, vpna->up.na_refcount);
622262153Sluigi	BDG_WUNLOCK(b);
623262153Sluigi	*na = ret;
624262153Sluigi	netmap_adapter_get(ret);
625262153Sluigi	return 0;
626262153Sluigi
627262153Sluigiout:
628262153Sluigi	if_rele(ifp);
629262153Sluigi
630262153Sluigi	return error;
631262153Sluigi}
632262153Sluigi
633262153Sluigi
634262153Sluigi/* Process NETMAP_BDG_ATTACH and NETMAP_BDG_DETACH */
635262153Sluigistatic int
636262153Sluiginm_bdg_attach(struct nmreq *nmr)
637262153Sluigi{
638262153Sluigi	struct netmap_adapter *na;
639262153Sluigi	struct netmap_if *nifp;
640262153Sluigi	struct netmap_priv_d *npriv;
641262153Sluigi	struct netmap_bwrap_adapter *bna;
642262153Sluigi	int error;
643262153Sluigi
644262153Sluigi	npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO);
645262153Sluigi	if (npriv == NULL)
646262153Sluigi		return ENOMEM;
647262153Sluigi
648262153Sluigi	NMG_LOCK();
649262153Sluigi
650262153Sluigi	error = netmap_get_bdg_na(nmr, &na, 1 /* create if not exists */);
651262153Sluigi	if (error) /* no device, or another bridge or user owns the device */
652262153Sluigi		goto unlock_exit;
653262153Sluigi
654262153Sluigi	if (na == NULL) { /* VALE prefix missing */
655262153Sluigi		error = EINVAL;
656262153Sluigi		goto unlock_exit;
657262153Sluigi	}
658262153Sluigi
659262153Sluigi	if (na->active_fds > 0) { /* already registered */
660262153Sluigi		error = EBUSY;
661262153Sluigi		goto unref_exit;
662262153Sluigi	}
663262153Sluigi
664262153Sluigi	nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags, &error);
665262153Sluigi	if (!nifp) {
666262153Sluigi		goto unref_exit;
667262153Sluigi	}
668262153Sluigi
669262153Sluigi	bna = (struct netmap_bwrap_adapter*)na;
670262153Sluigi	bna->na_kpriv = npriv;
671262153Sluigi	NMG_UNLOCK();
672262153Sluigi	ND("registered %s to netmap-mode", NM_IFPNAME(na->ifp));
673262153Sluigi	return 0;
674262153Sluigi
675262153Sluigiunref_exit:
676262153Sluigi	netmap_adapter_put(na);
677262153Sluigiunlock_exit:
678262153Sluigi	NMG_UNLOCK();
679262153Sluigi	bzero(npriv, sizeof(*npriv));
680262153Sluigi	free(npriv, M_DEVBUF);
681262153Sluigi	return error;
682262153Sluigi}
683262153Sluigi
684262153Sluigi
685262153Sluigistatic int
686262153Sluiginm_bdg_detach(struct nmreq *nmr)
687262153Sluigi{
688262153Sluigi	struct netmap_adapter *na;
689262153Sluigi	int error;
690262153Sluigi	struct netmap_bwrap_adapter *bna;
691262153Sluigi	int last_instance;
692262153Sluigi
693262153Sluigi	NMG_LOCK();
694262153Sluigi	error = netmap_get_bdg_na(nmr, &na, 0 /* don't create */);
695262153Sluigi	if (error) { /* no device, or another bridge or user owns the device */
696262153Sluigi		goto unlock_exit;
697262153Sluigi	}
698262153Sluigi
699262153Sluigi	if (na == NULL) { /* VALE prefix missing */
700262153Sluigi		error = EINVAL;
701262153Sluigi		goto unlock_exit;
702262153Sluigi	}
703262153Sluigi
704262153Sluigi	bna = (struct netmap_bwrap_adapter *)na;
705262153Sluigi
706262153Sluigi	if (na->active_fds == 0) { /* not registered */
707262153Sluigi		error = EINVAL;
708262153Sluigi		goto unref_exit;
709262153Sluigi	}
710262153Sluigi
711262153Sluigi	last_instance = netmap_dtor_locked(bna->na_kpriv); /* unregister */
712262153Sluigi	if (!last_instance) {
713262153Sluigi		D("--- error, trying to detach an entry with active mmaps");
714262153Sluigi		error = EINVAL;
715262153Sluigi	} else {
716262153Sluigi		struct netmap_priv_d *npriv = bna->na_kpriv;
717262153Sluigi
718262153Sluigi		bna->na_kpriv = NULL;
719262153Sluigi		D("deleting priv");
720262153Sluigi
721262153Sluigi		bzero(npriv, sizeof(*npriv));
722262153Sluigi		free(npriv, M_DEVBUF);
723262153Sluigi	}
724262153Sluigi
725262153Sluigiunref_exit:
726262153Sluigi	netmap_adapter_put(na);
727262153Sluigiunlock_exit:
728262153Sluigi	NMG_UNLOCK();
729262153Sluigi	return error;
730262153Sluigi
731262153Sluigi}
732262153Sluigi
733262153Sluigi
734262153Sluigi/* exported to kernel callers, e.g. OVS ?
735262153Sluigi * Entry point.
736262153Sluigi * Called without NMG_LOCK.
737262153Sluigi */
738262153Sluigiint
739262153Sluiginetmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func)
740262153Sluigi{
741262153Sluigi	struct nm_bridge *b;
742262153Sluigi	struct netmap_adapter *na;
743262153Sluigi	struct netmap_vp_adapter *vpna;
744262153Sluigi	struct ifnet *iter;
745262153Sluigi	char *name = nmr->nr_name;
746262153Sluigi	int cmd = nmr->nr_cmd, namelen = strlen(name);
747262153Sluigi	int error = 0, i, j;
748262153Sluigi
749262153Sluigi	switch (cmd) {
750262153Sluigi	case NETMAP_BDG_ATTACH:
751262153Sluigi		error = nm_bdg_attach(nmr);
752262153Sluigi		break;
753262153Sluigi
754262153Sluigi	case NETMAP_BDG_DETACH:
755262153Sluigi		error = nm_bdg_detach(nmr);
756262153Sluigi		break;
757262153Sluigi
758262153Sluigi	case NETMAP_BDG_LIST:
759262153Sluigi		/* this is used to enumerate bridges and ports */
760262153Sluigi		if (namelen) { /* look up indexes of bridge and port */
761262153Sluigi			if (strncmp(name, NM_NAME, strlen(NM_NAME))) {
762262153Sluigi				error = EINVAL;
763262153Sluigi				break;
764262153Sluigi			}
765262153Sluigi			NMG_LOCK();
766262153Sluigi			b = nm_find_bridge(name, 0 /* don't create */);
767262153Sluigi			if (!b) {
768262153Sluigi				error = ENOENT;
769262153Sluigi				NMG_UNLOCK();
770262153Sluigi				break;
771262153Sluigi			}
772262153Sluigi
773262153Sluigi			error = ENOENT;
774262153Sluigi			for (j = 0; j < b->bdg_active_ports; j++) {
775262153Sluigi				i = b->bdg_port_index[j];
776262153Sluigi				vpna = b->bdg_ports[i];
777262153Sluigi				if (vpna == NULL) {
778262153Sluigi					D("---AAAAAAAAARGH-------");
779262153Sluigi					continue;
780262153Sluigi				}
781262153Sluigi				iter = vpna->up.ifp;
782262153Sluigi				/* the former and the latter identify a
783262153Sluigi				 * virtual port and a NIC, respectively
784262153Sluigi				 */
785262153Sluigi				if (!strcmp(iter->if_xname, name)) {
786262153Sluigi					/* bridge index */
787262153Sluigi					nmr->nr_arg1 = b - nm_bridges;
788262153Sluigi					nmr->nr_arg2 = i; /* port index */
789262153Sluigi					error = 0;
790262153Sluigi					break;
791262153Sluigi				}
792262153Sluigi			}
793262153Sluigi			NMG_UNLOCK();
794262153Sluigi		} else {
795262153Sluigi			/* return the first non-empty entry starting from
796262153Sluigi			 * bridge nr_arg1 and port nr_arg2.
797262153Sluigi			 *
798262153Sluigi			 * Users can detect the end of the same bridge by
799262153Sluigi			 * seeing the new and old value of nr_arg1, and can
800262153Sluigi			 * detect the end of all the bridge by error != 0
801262153Sluigi			 */
802262153Sluigi			i = nmr->nr_arg1;
803262153Sluigi			j = nmr->nr_arg2;
804262153Sluigi
805262153Sluigi			NMG_LOCK();
806262153Sluigi			for (error = ENOENT; i < NM_BRIDGES; i++) {
807262153Sluigi				b = nm_bridges + i;
808262153Sluigi				if (j >= b->bdg_active_ports) {
809262153Sluigi					j = 0; /* following bridges scan from 0 */
810262153Sluigi					continue;
811262153Sluigi				}
812262153Sluigi				nmr->nr_arg1 = i;
813262153Sluigi				nmr->nr_arg2 = j;
814262153Sluigi				j = b->bdg_port_index[j];
815262153Sluigi				vpna = b->bdg_ports[j];
816262153Sluigi				iter = vpna->up.ifp;
817262153Sluigi				strncpy(name, iter->if_xname, (size_t)IFNAMSIZ);
818262153Sluigi				error = 0;
819262153Sluigi				break;
820262153Sluigi			}
821262153Sluigi			NMG_UNLOCK();
822262153Sluigi		}
823262153Sluigi		break;
824262153Sluigi
825262153Sluigi	case NETMAP_BDG_LOOKUP_REG:
826262153Sluigi		/* register a lookup function to the given bridge.
827262153Sluigi		 * nmr->nr_name may be just bridge's name (including ':'
828262153Sluigi		 * if it is not just NM_NAME).
829262153Sluigi		 */
830262153Sluigi		if (!func) {
831262153Sluigi			error = EINVAL;
832262153Sluigi			break;
833262153Sluigi		}
834262153Sluigi		NMG_LOCK();
835262153Sluigi		b = nm_find_bridge(name, 0 /* don't create */);
836262153Sluigi		if (!b) {
837262153Sluigi			error = EINVAL;
838262153Sluigi		} else {
839262153Sluigi			b->nm_bdg_lookup = func;
840262153Sluigi		}
841262153Sluigi		NMG_UNLOCK();
842262153Sluigi		break;
843262153Sluigi
844262153Sluigi	case NETMAP_BDG_VNET_HDR:
845262153Sluigi		/* Valid lengths for the virtio-net header are 0 (no header),
846262153Sluigi		   10 and 12. */
847262153Sluigi		if (nmr->nr_arg1 != 0 &&
848262153Sluigi			nmr->nr_arg1 != sizeof(struct nm_vnet_hdr) &&
849262153Sluigi				nmr->nr_arg1 != 12) {
850262153Sluigi			error = EINVAL;
851262153Sluigi			break;
852262153Sluigi		}
853262153Sluigi		NMG_LOCK();
854262153Sluigi		error = netmap_get_bdg_na(nmr, &na, 0);
855262153Sluigi		if (na && !error) {
856262153Sluigi			vpna = (struct netmap_vp_adapter *)na;
857262153Sluigi			vpna->virt_hdr_len = nmr->nr_arg1;
858262153Sluigi			if (vpna->virt_hdr_len)
859262153Sluigi				vpna->mfs = NETMAP_BDG_BUF_SIZE(na->nm_mem);
860262153Sluigi			D("Using vnet_hdr_len %d for %p", vpna->virt_hdr_len, vpna);
861262153Sluigi			netmap_adapter_put(na);
862262153Sluigi		}
863262153Sluigi		NMG_UNLOCK();
864262153Sluigi		break;
865262153Sluigi
866262153Sluigi	default:
867262153Sluigi		D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd);
868262153Sluigi		error = EINVAL;
869262153Sluigi		break;
870262153Sluigi	}
871262153Sluigi	return error;
872262153Sluigi}
873262153Sluigi
874262153Sluigistatic int
875262153Sluiginetmap_vp_krings_create(struct netmap_adapter *na)
876262153Sluigi{
877262153Sluigi	u_int tailroom;
878262153Sluigi	int error, i;
879262153Sluigi	uint32_t *leases;
880262153Sluigi	u_int nrx = netmap_real_rx_rings(na);
881262153Sluigi
882262153Sluigi	/*
883262153Sluigi	 * Leases are attached to RX rings on vale ports
884262153Sluigi	 */
885262153Sluigi	tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;
886262153Sluigi
887262153Sluigi	error = netmap_krings_create(na, tailroom);
888262153Sluigi	if (error)
889262153Sluigi		return error;
890262153Sluigi
891262153Sluigi	leases = na->tailroom;
892262153Sluigi
893262153Sluigi	for (i = 0; i < nrx; i++) { /* Receive rings */
894262153Sluigi		na->rx_rings[i].nkr_leases = leases;
895262153Sluigi		leases += na->num_rx_desc;
896262153Sluigi	}
897262153Sluigi
898262153Sluigi	error = nm_alloc_bdgfwd(na);
899262153Sluigi	if (error) {
900262153Sluigi		netmap_krings_delete(na);
901262153Sluigi		return error;
902262153Sluigi	}
903262153Sluigi
904262153Sluigi	return 0;
905262153Sluigi}
906262153Sluigi
907262153Sluigi
908262153Sluigistatic void
909262153Sluiginetmap_vp_krings_delete(struct netmap_adapter *na)
910262153Sluigi{
911262153Sluigi	nm_free_bdgfwd(na);
912262153Sluigi	netmap_krings_delete(na);
913262153Sluigi}
914262153Sluigi
915262153Sluigi
916262153Sluigistatic int
917262153Sluiginm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
918262153Sluigi	struct netmap_vp_adapter *na, u_int ring_nr);
919262153Sluigi
920262153Sluigi
921262153Sluigi/*
922262153Sluigi * Grab packets from a kring, move them into the ft structure
923262153Sluigi * associated to the tx (input) port. Max one instance per port,
924262153Sluigi * filtered on input (ioctl, poll or XXX).
925262153Sluigi * Returns the next position in the ring.
926262153Sluigi */
927262153Sluigistatic int
928262153Sluiginm_bdg_preflush(struct netmap_vp_adapter *na, u_int ring_nr,
929262153Sluigi	struct netmap_kring *kring, u_int end)
930262153Sluigi{
931262153Sluigi	struct netmap_ring *ring = kring->ring;
932262153Sluigi	struct nm_bdg_fwd *ft;
933262153Sluigi	u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
934262153Sluigi	u_int ft_i = 0;	/* start from 0 */
935262153Sluigi	u_int frags = 1; /* how many frags ? */
936262153Sluigi	struct nm_bridge *b = na->na_bdg;
937262153Sluigi
938262153Sluigi	/* To protect against modifications to the bridge we acquire a
939262153Sluigi	 * shared lock, waiting if we can sleep (if the source port is
940262153Sluigi	 * attached to a user process) or with a trylock otherwise (NICs).
941262153Sluigi	 */
942262153Sluigi	ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
943262153Sluigi	if (na->up.na_flags & NAF_BDG_MAYSLEEP)
944262153Sluigi		BDG_RLOCK(b);
945262153Sluigi	else if (!BDG_RTRYLOCK(b))
946262153Sluigi		return 0;
947262153Sluigi	ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
948262153Sluigi	ft = kring->nkr_ft;
949262153Sluigi
950262153Sluigi	for (; likely(j != end); j = nm_next(j, lim)) {
951262153Sluigi		struct netmap_slot *slot = &ring->slot[j];
952262153Sluigi		char *buf;
953262153Sluigi
954262153Sluigi		ft[ft_i].ft_len = slot->len;
955262153Sluigi		ft[ft_i].ft_flags = slot->flags;
956262153Sluigi
957262153Sluigi		ND("flags is 0x%x", slot->flags);
958262153Sluigi		/* this slot goes into a list so initialize the link field */
959262153Sluigi		ft[ft_i].ft_next = NM_FT_NULL;
960262153Sluigi		buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
961262153Sluigi			(void *)(uintptr_t)slot->ptr : BDG_NMB(&na->up, slot);
962262153Sluigi		__builtin_prefetch(buf);
963262153Sluigi		++ft_i;
964262153Sluigi		if (slot->flags & NS_MOREFRAG) {
965262153Sluigi			frags++;
966262153Sluigi			continue;
967262153Sluigi		}
968262153Sluigi		if (unlikely(netmap_verbose && frags > 1))
969262153Sluigi			RD(5, "%d frags at %d", frags, ft_i - frags);
970262153Sluigi		ft[ft_i - frags].ft_frags = frags;
971262153Sluigi		frags = 1;
972262153Sluigi		if (unlikely((int)ft_i >= bridge_batch))
973262153Sluigi			ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
974262153Sluigi	}
975262153Sluigi	if (frags > 1) {
976262153Sluigi		D("truncate incomplete fragment at %d (%d frags)", ft_i, frags);
977262153Sluigi		// ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG
978262153Sluigi		ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG;
979262153Sluigi		ft[ft_i - frags].ft_frags = frags - 1;
980262153Sluigi	}
981262153Sluigi	if (ft_i)
982262153Sluigi		ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
983262153Sluigi	BDG_RUNLOCK(b);
984262153Sluigi	return j;
985262153Sluigi}
986262153Sluigi
987262153Sluigi
988262153Sluigi/* ----- FreeBSD if_bridge hash function ------- */
989262153Sluigi
990262153Sluigi/*
991262153Sluigi * The following hash function is adapted from "Hash Functions" by Bob Jenkins
992262153Sluigi * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
993262153Sluigi *
994262153Sluigi * http://www.burtleburtle.net/bob/hash/spooky.html
995262153Sluigi */
996262153Sluigi#define mix(a, b, c)                                                    \
997262153Sluigido {                                                                    \
998262153Sluigi        a -= b; a -= c; a ^= (c >> 13);                                 \
999262153Sluigi        b -= c; b -= a; b ^= (a << 8);                                  \
1000262153Sluigi        c -= a; c -= b; c ^= (b >> 13);                                 \
1001262153Sluigi        a -= b; a -= c; a ^= (c >> 12);                                 \
1002262153Sluigi        b -= c; b -= a; b ^= (a << 16);                                 \
1003262153Sluigi        c -= a; c -= b; c ^= (b >> 5);                                  \
1004262153Sluigi        a -= b; a -= c; a ^= (c >> 3);                                  \
1005262153Sluigi        b -= c; b -= a; b ^= (a << 10);                                 \
1006262153Sluigi        c -= a; c -= b; c ^= (b >> 15);                                 \
1007262153Sluigi} while (/*CONSTCOND*/0)
1008262153Sluigi
1009262153Sluigi
1010262153Sluigistatic __inline uint32_t
1011262153Sluiginm_bridge_rthash(const uint8_t *addr)
1012262153Sluigi{
1013262153Sluigi        uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
1014262153Sluigi
1015262153Sluigi        b += addr[5] << 8;
1016262153Sluigi        b += addr[4];
1017262153Sluigi        a += addr[3] << 24;
1018262153Sluigi        a += addr[2] << 16;
1019262153Sluigi        a += addr[1] << 8;
1020262153Sluigi        a += addr[0];
1021262153Sluigi
1022262153Sluigi        mix(a, b, c);
1023262153Sluigi#define BRIDGE_RTHASH_MASK	(NM_BDG_HASH-1)
1024262153Sluigi        return (c & BRIDGE_RTHASH_MASK);
1025262153Sluigi}
1026262153Sluigi
1027262153Sluigi#undef mix
1028262153Sluigi
1029262153Sluigi
1030262153Sluigistatic int
1031262153Sluigibdg_netmap_reg(struct netmap_adapter *na, int onoff)
1032262153Sluigi{
1033262153Sluigi	struct netmap_vp_adapter *vpna =
1034262153Sluigi		(struct netmap_vp_adapter*)na;
1035262153Sluigi	struct ifnet *ifp = na->ifp;
1036262153Sluigi
1037262153Sluigi	/* the interface is already attached to the bridge,
1038262153Sluigi	 * so we only need to toggle IFCAP_NETMAP.
1039262153Sluigi	 */
1040262153Sluigi	BDG_WLOCK(vpna->na_bdg);
1041262153Sluigi	if (onoff) {
1042262153Sluigi		ifp->if_capenable |= IFCAP_NETMAP;
1043262153Sluigi	} else {
1044262153Sluigi		ifp->if_capenable &= ~IFCAP_NETMAP;
1045262153Sluigi	}
1046262153Sluigi	BDG_WUNLOCK(vpna->na_bdg);
1047262153Sluigi	return 0;
1048262153Sluigi}
1049262153Sluigi
1050262153Sluigi
1051262153Sluigi/*
1052262153Sluigi * Lookup function for a learning bridge.
1053262153Sluigi * Update the hash table with the source address,
1054262153Sluigi * and then returns the destination port index, and the
1055262153Sluigi * ring in *dst_ring (at the moment, always use ring 0)
1056262153Sluigi */
1057262153Sluigiu_int
1058262153Sluiginetmap_bdg_learning(char *buf, u_int buf_len, uint8_t *dst_ring,
1059262153Sluigi		struct netmap_vp_adapter *na)
1060262153Sluigi{
1061262153Sluigi	struct nm_hash_ent *ht = na->na_bdg->ht;
1062262153Sluigi	uint32_t sh, dh;
1063262153Sluigi	u_int dst, mysrc = na->bdg_port;
1064262153Sluigi	uint64_t smac, dmac;
1065262153Sluigi
1066262153Sluigi	if (buf_len < 14) {
1067262153Sluigi		D("invalid buf length %d", buf_len);
1068262153Sluigi		return NM_BDG_NOPORT;
1069262153Sluigi	}
1070262153Sluigi	dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
1071262153Sluigi	smac = le64toh(*(uint64_t *)(buf + 4));
1072262153Sluigi	smac >>= 16;
1073262153Sluigi
1074262153Sluigi	/*
1075262153Sluigi	 * The hash is somewhat expensive, there might be some
1076262153Sluigi	 * worthwhile optimizations here.
1077262153Sluigi	 */
1078262153Sluigi	if ((buf[6] & 1) == 0) { /* valid src */
1079262153Sluigi		uint8_t *s = buf+6;
1080262153Sluigi		sh = nm_bridge_rthash(s); // XXX hash of source
1081262153Sluigi		/* update source port forwarding entry */
1082262153Sluigi		ht[sh].mac = smac;	/* XXX expire ? */
1083262153Sluigi		ht[sh].ports = mysrc;
1084262153Sluigi		if (netmap_verbose)
1085262153Sluigi		    D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
1086262153Sluigi			s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
1087262153Sluigi	}
1088262153Sluigi	dst = NM_BDG_BROADCAST;
1089262153Sluigi	if ((buf[0] & 1) == 0) { /* unicast */
1090262153Sluigi		dh = nm_bridge_rthash(buf); // XXX hash of dst
1091262153Sluigi		if (ht[dh].mac == dmac) {	/* found dst */
1092262153Sluigi			dst = ht[dh].ports;
1093262153Sluigi		}
1094262153Sluigi		/* XXX otherwise return NM_BDG_UNKNOWN ? */
1095262153Sluigi	}
1096262153Sluigi	*dst_ring = 0;
1097262153Sluigi	return dst;
1098262153Sluigi}
1099262153Sluigi
1100262153Sluigi
1101262153Sluigi/*
1102262153Sluigi * Available space in the ring. Only used in VALE code
1103262153Sluigi * and only with is_rx = 1
1104262153Sluigi */
1105262153Sluigistatic inline uint32_t
1106262153Sluiginm_kr_space(struct netmap_kring *k, int is_rx)
1107262153Sluigi{
1108262153Sluigi	int space;
1109262153Sluigi
1110262153Sluigi	if (is_rx) {
1111262153Sluigi		int busy = k->nkr_hwlease - k->nr_hwcur;
1112262153Sluigi		if (busy < 0)
1113262153Sluigi			busy += k->nkr_num_slots;
1114262153Sluigi		space = k->nkr_num_slots - 1 - busy;
1115262153Sluigi	} else {
1116262153Sluigi		/* XXX never used in this branch */
1117262153Sluigi		space = k->nr_hwtail - k->nkr_hwlease;
1118262153Sluigi		if (space < 0)
1119262153Sluigi			space += k->nkr_num_slots;
1120262153Sluigi	}
1121262153Sluigi#if 0
1122262153Sluigi	// sanity check
1123262153Sluigi	if (k->nkr_hwlease >= k->nkr_num_slots ||
1124262153Sluigi		k->nr_hwcur >= k->nkr_num_slots ||
1125262153Sluigi		k->nr_tail >= k->nkr_num_slots ||
1126262153Sluigi		busy < 0 ||
1127262153Sluigi		busy >= k->nkr_num_slots) {
1128262153Sluigi		D("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d",			k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
1129262153Sluigi			k->nkr_lease_idx, k->nkr_num_slots);
1130262153Sluigi	}
1131262153Sluigi#endif
1132262153Sluigi	return space;
1133262153Sluigi}
1134262153Sluigi
1135262153Sluigi
1136262153Sluigi
1137262153Sluigi
1138262153Sluigi/* make a lease on the kring for N positions. return the
1139262153Sluigi * lease index
1140262153Sluigi * XXX only used in VALE code and with is_rx = 1
1141262153Sluigi */
1142262153Sluigistatic inline uint32_t
1143262153Sluiginm_kr_lease(struct netmap_kring *k, u_int n, int is_rx)
1144262153Sluigi{
1145262153Sluigi	uint32_t lim = k->nkr_num_slots - 1;
1146262153Sluigi	uint32_t lease_idx = k->nkr_lease_idx;
1147262153Sluigi
1148262153Sluigi	k->nkr_leases[lease_idx] = NR_NOSLOT;
1149262153Sluigi	k->nkr_lease_idx = nm_next(lease_idx, lim);
1150262153Sluigi
1151262153Sluigi	if (n > nm_kr_space(k, is_rx)) {
1152262153Sluigi		D("invalid request for %d slots", n);
1153262153Sluigi		panic("x");
1154262153Sluigi	}
1155262153Sluigi	/* XXX verify that there are n slots */
1156262153Sluigi	k->nkr_hwlease += n;
1157262153Sluigi	if (k->nkr_hwlease > lim)
1158262153Sluigi		k->nkr_hwlease -= lim + 1;
1159262153Sluigi
1160262153Sluigi	if (k->nkr_hwlease >= k->nkr_num_slots ||
1161262153Sluigi		k->nr_hwcur >= k->nkr_num_slots ||
1162262153Sluigi		k->nr_hwtail >= k->nkr_num_slots ||
1163262153Sluigi		k->nkr_lease_idx >= k->nkr_num_slots) {
1164262153Sluigi		D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d",
1165262153Sluigi			k->na->ifp->if_xname,
1166262153Sluigi			k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
1167262153Sluigi			k->nkr_lease_idx, k->nkr_num_slots);
1168262153Sluigi	}
1169262153Sluigi	return lease_idx;
1170262153Sluigi}
1171262153Sluigi
1172262153Sluigi/*
1173262153Sluigi * This flush routine supports only unicast and broadcast but a large
1174262153Sluigi * number of ports, and lets us replace the learn and dispatch functions.
1175262153Sluigi */
1176262153Sluigiint
1177262153Sluiginm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
1178262153Sluigi		u_int ring_nr)
1179262153Sluigi{
1180262153Sluigi	struct nm_bdg_q *dst_ents, *brddst;
1181262153Sluigi	uint16_t num_dsts = 0, *dsts;
1182262153Sluigi	struct nm_bridge *b = na->na_bdg;
1183262153Sluigi	u_int i, j, me = na->bdg_port;
1184262153Sluigi
1185262153Sluigi	/*
1186262153Sluigi	 * The work area (pointed by ft) is followed by an array of
1187262153Sluigi	 * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
1188262153Sluigi	 * queues per port plus one for the broadcast traffic.
1189262153Sluigi	 * Then we have an array of destination indexes.
1190262153Sluigi	 */
1191262153Sluigi	dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
1192262153Sluigi	dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
1193262153Sluigi
1194262153Sluigi	/* first pass: find a destination for each packet in the batch */
1195262153Sluigi	for (i = 0; likely(i < n); i += ft[i].ft_frags) {
1196262153Sluigi		uint8_t dst_ring = ring_nr; /* default, same ring as origin */
1197262153Sluigi		uint16_t dst_port, d_i;
1198262153Sluigi		struct nm_bdg_q *d;
1199262153Sluigi		uint8_t *buf = ft[i].ft_buf;
1200262153Sluigi		u_int len = ft[i].ft_len;
1201262153Sluigi
1202262153Sluigi		ND("slot %d frags %d", i, ft[i].ft_frags);
1203262153Sluigi		/* Drop the packet if the virtio-net header is not into the first
1204262153Sluigi		   fragment nor at the very beginning of the second. */
1205262153Sluigi		if (unlikely(na->virt_hdr_len > len))
1206262153Sluigi			continue;
1207262153Sluigi		if (len == na->virt_hdr_len) {
1208262153Sluigi			buf = ft[i+1].ft_buf;
1209262153Sluigi			len = ft[i+1].ft_len;
1210262153Sluigi		} else {
1211262153Sluigi			buf += na->virt_hdr_len;
1212262153Sluigi			len -= na->virt_hdr_len;
1213262153Sluigi		}
1214262153Sluigi		dst_port = b->nm_bdg_lookup(buf, len, &dst_ring, na);
1215262153Sluigi		if (netmap_verbose > 255)
1216262153Sluigi			RD(5, "slot %d port %d -> %d", i, me, dst_port);
1217262153Sluigi		if (dst_port == NM_BDG_NOPORT)
1218262153Sluigi			continue; /* this packet is identified to be dropped */
1219262153Sluigi		else if (unlikely(dst_port > NM_BDG_MAXPORTS))
1220262153Sluigi			continue;
1221262153Sluigi		else if (dst_port == NM_BDG_BROADCAST)
1222262153Sluigi			dst_ring = 0; /* broadcasts always go to ring 0 */
1223262153Sluigi		else if (unlikely(dst_port == me ||
1224262153Sluigi		    !b->bdg_ports[dst_port]))
1225262153Sluigi			continue;
1226262153Sluigi
1227262153Sluigi		/* get a position in the scratch pad */
1228262153Sluigi		d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
1229262153Sluigi		d = dst_ents + d_i;
1230262153Sluigi
1231262153Sluigi		/* append the first fragment to the list */
1232262153Sluigi		if (d->bq_head == NM_FT_NULL) { /* new destination */
1233262153Sluigi			d->bq_head = d->bq_tail = i;
1234262153Sluigi			/* remember this position to be scanned later */
1235262153Sluigi			if (dst_port != NM_BDG_BROADCAST)
1236262153Sluigi				dsts[num_dsts++] = d_i;
1237262153Sluigi		} else {
1238262153Sluigi			ft[d->bq_tail].ft_next = i;
1239262153Sluigi			d->bq_tail = i;
1240262153Sluigi		}
1241262153Sluigi		d->bq_len += ft[i].ft_frags;
1242262153Sluigi	}
1243262153Sluigi
1244262153Sluigi	/*
1245262153Sluigi	 * Broadcast traffic goes to ring 0 on all destinations.
1246262153Sluigi	 * So we need to add these rings to the list of ports to scan.
1247262153Sluigi	 * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
1248262153Sluigi	 * expensive. We should keep a compact list of active destinations
1249262153Sluigi	 * so we could shorten this loop.
1250262153Sluigi	 */
1251262153Sluigi	brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
1252262153Sluigi	if (brddst->bq_head != NM_FT_NULL) {
1253262153Sluigi		for (j = 0; likely(j < b->bdg_active_ports); j++) {
1254262153Sluigi			uint16_t d_i;
1255262153Sluigi			i = b->bdg_port_index[j];
1256262153Sluigi			if (unlikely(i == me))
1257262153Sluigi				continue;
1258262153Sluigi			d_i = i * NM_BDG_MAXRINGS;
1259262153Sluigi			if (dst_ents[d_i].bq_head == NM_FT_NULL)
1260262153Sluigi				dsts[num_dsts++] = d_i;
1261262153Sluigi		}
1262262153Sluigi	}
1263262153Sluigi
1264262153Sluigi	ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
1265262153Sluigi	/* second pass: scan destinations (XXX will be modular somehow) */
1266262153Sluigi	for (i = 0; i < num_dsts; i++) {
1267262153Sluigi		struct ifnet *dst_ifp;
1268262153Sluigi		struct netmap_vp_adapter *dst_na;
1269262153Sluigi		struct netmap_kring *kring;
1270262153Sluigi		struct netmap_ring *ring;
1271262153Sluigi		u_int dst_nr, lim, j, d_i, next, brd_next;
1272262153Sluigi		u_int needed, howmany;
1273262153Sluigi		int retry = netmap_txsync_retry;
1274262153Sluigi		struct nm_bdg_q *d;
1275262153Sluigi		uint32_t my_start = 0, lease_idx = 0;
1276262153Sluigi		int nrings;
1277262153Sluigi		int virt_hdr_mismatch = 0;
1278262153Sluigi
1279262153Sluigi		d_i = dsts[i];
1280262153Sluigi		ND("second pass %d port %d", i, d_i);
1281262153Sluigi		d = dst_ents + d_i;
1282262153Sluigi		// XXX fix the division
1283262153Sluigi		dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
1284262153Sluigi		/* protect from the lookup function returning an inactive
1285262153Sluigi		 * destination port
1286262153Sluigi		 */
1287262153Sluigi		if (unlikely(dst_na == NULL))
1288262153Sluigi			goto cleanup;
1289262153Sluigi		if (dst_na->up.na_flags & NAF_SW_ONLY)
1290262153Sluigi			goto cleanup;
1291262153Sluigi		dst_ifp = dst_na->up.ifp;
1292262153Sluigi		/*
1293262153Sluigi		 * The interface may be in !netmap mode in two cases:
1294262153Sluigi		 * - when na is attached but not activated yet;
1295262153Sluigi		 * - when na is being deactivated but is still attached.
1296262153Sluigi		 */
1297262153Sluigi		if (unlikely(!(dst_ifp->if_capenable & IFCAP_NETMAP))) {
1298262153Sluigi			ND("not in netmap mode!");
1299262153Sluigi			goto cleanup;
1300262153Sluigi		}
1301262153Sluigi
1302262153Sluigi		/* there is at least one either unicast or broadcast packet */
1303262153Sluigi		brd_next = brddst->bq_head;
1304262153Sluigi		next = d->bq_head;
1305262153Sluigi		/* we need to reserve this many slots. If fewer are
1306262153Sluigi		 * available, some packets will be dropped.
1307262153Sluigi		 * Packets may have multiple fragments, so we may not use
1308262153Sluigi		 * there is a chance that we may not use all of the slots
1309262153Sluigi		 * we have claimed, so we will need to handle the leftover
1310262153Sluigi		 * ones when we regain the lock.
1311262153Sluigi		 */
1312262153Sluigi		needed = d->bq_len + brddst->bq_len;
1313262153Sluigi
1314262153Sluigi		if (unlikely(dst_na->virt_hdr_len != na->virt_hdr_len)) {
1315262153Sluigi			/* There is a virtio-net header/offloadings mismatch between
1316262153Sluigi			 * source and destination. The slower mismatch datapath will
1317262153Sluigi			 * be used to cope with all the mismatches.
1318262153Sluigi			 */
1319262153Sluigi			virt_hdr_mismatch = 1;
1320262153Sluigi			if (dst_na->mfs < na->mfs) {
1321262153Sluigi				/* We may need to do segmentation offloadings, and so
1322262153Sluigi				 * we may need a number of destination slots greater
1323262153Sluigi				 * than the number of input slots ('needed').
1324262153Sluigi				 * We look for the smallest integer 'x' which satisfies:
1325262153Sluigi				 *	needed * na->mfs + x * H <= x * na->mfs
1326262153Sluigi				 * where 'H' is the length of the longest header that may
1327262153Sluigi				 * be replicated in the segmentation process (e.g. for
1328262153Sluigi				 * TCPv4 we must account for ethernet header, IP header
1329262153Sluigi				 * and TCPv4 header).
1330262153Sluigi				 */
1331262153Sluigi				needed = (needed * na->mfs) /
1332262153Sluigi						(dst_na->mfs - WORST_CASE_GSO_HEADER) + 1;
1333262153Sluigi				ND(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed);
1334262153Sluigi			}
1335262153Sluigi		}
1336262153Sluigi
1337262153Sluigi		ND(5, "pass 2 dst %d is %x %s",
1338262153Sluigi			i, d_i, is_vp ? "virtual" : "nic/host");
1339262153Sluigi		dst_nr = d_i & (NM_BDG_MAXRINGS-1);
1340262153Sluigi		nrings = dst_na->up.num_rx_rings;
1341262153Sluigi		if (dst_nr >= nrings)
1342262153Sluigi			dst_nr = dst_nr % nrings;
1343262153Sluigi		kring = &dst_na->up.rx_rings[dst_nr];
1344262153Sluigi		ring = kring->ring;
1345262153Sluigi		lim = kring->nkr_num_slots - 1;
1346262153Sluigi
1347262153Sluigiretry:
1348262153Sluigi
1349262153Sluigi		if (dst_na->retry && retry) {
1350262153Sluigi			/* try to get some free slot from the previous run */
1351262153Sluigi			dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1352262153Sluigi		}
1353262153Sluigi		/* reserve the buffers in the queue and an entry
1354262153Sluigi		 * to report completion, and drop lock.
1355262153Sluigi		 * XXX this might become a helper function.
1356262153Sluigi		 */
1357262153Sluigi		mtx_lock(&kring->q_lock);
1358262153Sluigi		if (kring->nkr_stopped) {
1359262153Sluigi			mtx_unlock(&kring->q_lock);
1360262153Sluigi			goto cleanup;
1361262153Sluigi		}
1362262153Sluigi		my_start = j = kring->nkr_hwlease;
1363262153Sluigi		howmany = nm_kr_space(kring, 1);
1364262153Sluigi		if (needed < howmany)
1365262153Sluigi			howmany = needed;
1366262153Sluigi		lease_idx = nm_kr_lease(kring, howmany, 1);
1367262153Sluigi		mtx_unlock(&kring->q_lock);
1368262153Sluigi
1369262153Sluigi		/* only retry if we need more than available slots */
1370262153Sluigi		if (retry && needed <= howmany)
1371262153Sluigi			retry = 0;
1372262153Sluigi
1373262153Sluigi		/* copy to the destination queue */
1374262153Sluigi		while (howmany > 0) {
1375262153Sluigi			struct netmap_slot *slot;
1376262153Sluigi			struct nm_bdg_fwd *ft_p, *ft_end;
1377262153Sluigi			u_int cnt;
1378262153Sluigi
1379262153Sluigi			/* find the queue from which we pick next packet.
1380262153Sluigi			 * NM_FT_NULL is always higher than valid indexes
1381262153Sluigi			 * so we never dereference it if the other list
1382262153Sluigi			 * has packets (and if both are empty we never
1383262153Sluigi			 * get here).
1384262153Sluigi			 */
1385262153Sluigi			if (next < brd_next) {
1386262153Sluigi				ft_p = ft + next;
1387262153Sluigi				next = ft_p->ft_next;
1388262153Sluigi			} else { /* insert broadcast */
1389262153Sluigi				ft_p = ft + brd_next;
1390262153Sluigi				brd_next = ft_p->ft_next;
1391262153Sluigi			}
1392262153Sluigi			cnt = ft_p->ft_frags; // cnt > 0
1393262153Sluigi			if (unlikely(cnt > howmany))
1394262153Sluigi			    break; /* no more space */
1395262153Sluigi			if (netmap_verbose && cnt > 1)
1396262153Sluigi				RD(5, "rx %d frags to %d", cnt, j);
1397262153Sluigi			ft_end = ft_p + cnt;
1398262153Sluigi			if (unlikely(virt_hdr_mismatch)) {
1399262153Sluigi				bdg_mismatch_datapath(na, dst_na, ft_p, ring, &j, lim, &howmany);
1400262153Sluigi			} else {
1401262153Sluigi				howmany -= cnt;
1402262153Sluigi				do {
1403262153Sluigi					char *dst, *src = ft_p->ft_buf;
1404262153Sluigi					size_t copy_len = ft_p->ft_len, dst_len = copy_len;
1405262153Sluigi
1406262153Sluigi					slot = &ring->slot[j];
1407262153Sluigi					dst = BDG_NMB(&dst_na->up, slot);
1408262153Sluigi
1409262153Sluigi					ND("send [%d] %d(%d) bytes at %s:%d",
1410262153Sluigi							i, (int)copy_len, (int)dst_len,
1411262153Sluigi							NM_IFPNAME(dst_ifp), j);
1412262153Sluigi					/* round to a multiple of 64 */
1413262153Sluigi					copy_len = (copy_len + 63) & ~63;
1414262153Sluigi
1415262153Sluigi					if (ft_p->ft_flags & NS_INDIRECT) {
1416262153Sluigi						if (copyin(src, dst, copy_len)) {
1417262153Sluigi							// invalid user pointer, pretend len is 0
1418262153Sluigi							dst_len = 0;
1419262153Sluigi						}
1420262153Sluigi					} else {
1421262153Sluigi						//memcpy(dst, src, copy_len);
1422262153Sluigi						pkt_copy(src, dst, (int)copy_len);
1423262153Sluigi					}
1424262153Sluigi					slot->len = dst_len;
1425262153Sluigi					slot->flags = (cnt << 8)| NS_MOREFRAG;
1426262153Sluigi					j = nm_next(j, lim);
1427262153Sluigi					needed--;
1428262153Sluigi					ft_p++;
1429262153Sluigi				} while (ft_p != ft_end);
1430262153Sluigi				slot->flags = (cnt << 8); /* clear flag on last entry */
1431262153Sluigi			}
1432262153Sluigi			/* are we done ? */
1433262153Sluigi			if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
1434262153Sluigi				break;
1435262153Sluigi		}
1436262153Sluigi		{
1437262153Sluigi		    /* current position */
1438262153Sluigi		    uint32_t *p = kring->nkr_leases; /* shorthand */
1439262153Sluigi		    uint32_t update_pos;
1440262153Sluigi		    int still_locked = 1;
1441262153Sluigi
1442262153Sluigi		    mtx_lock(&kring->q_lock);
1443262153Sluigi		    if (unlikely(howmany > 0)) {
1444262153Sluigi			/* not used all bufs. If i am the last one
1445262153Sluigi			 * i can recover the slots, otherwise must
1446262153Sluigi			 * fill them with 0 to mark empty packets.
1447262153Sluigi			 */
1448262153Sluigi			ND("leftover %d bufs", howmany);
1449262153Sluigi			if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
1450262153Sluigi			    /* yes i am the last one */
1451262153Sluigi			    ND("roll back nkr_hwlease to %d", j);
1452262153Sluigi			    kring->nkr_hwlease = j;
1453262153Sluigi			} else {
1454262153Sluigi			    while (howmany-- > 0) {
1455262153Sluigi				ring->slot[j].len = 0;
1456262153Sluigi				ring->slot[j].flags = 0;
1457262153Sluigi				j = nm_next(j, lim);
1458262153Sluigi			    }
1459262153Sluigi			}
1460262153Sluigi		    }
1461262153Sluigi		    p[lease_idx] = j; /* report I am done */
1462262153Sluigi
1463262153Sluigi		    update_pos = kring->nr_hwtail;
1464262153Sluigi
1465262153Sluigi		    if (my_start == update_pos) {
1466262153Sluigi			/* all slots before my_start have been reported,
1467262153Sluigi			 * so scan subsequent leases to see if other ranges
1468262153Sluigi			 * have been completed, and to a selwakeup or txsync.
1469262153Sluigi		         */
1470262153Sluigi			while (lease_idx != kring->nkr_lease_idx &&
1471262153Sluigi				p[lease_idx] != NR_NOSLOT) {
1472262153Sluigi			    j = p[lease_idx];
1473262153Sluigi			    p[lease_idx] = NR_NOSLOT;
1474262153Sluigi			    lease_idx = nm_next(lease_idx, lim);
1475262153Sluigi			}
1476262153Sluigi			/* j is the new 'write' position. j != my_start
1477262153Sluigi			 * means there are new buffers to report
1478262153Sluigi			 */
1479262153Sluigi			if (likely(j != my_start)) {
1480262153Sluigi				kring->nr_hwtail = j;
1481262153Sluigi				still_locked = 0;
1482262153Sluigi				mtx_unlock(&kring->q_lock);
1483262153Sluigi				dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1484262153Sluigi				if (dst_na->retry && retry--)
1485262153Sluigi					goto retry;
1486262153Sluigi			}
1487262153Sluigi		    }
1488262153Sluigi		    if (still_locked)
1489262153Sluigi			mtx_unlock(&kring->q_lock);
1490262153Sluigi		}
1491262153Sluigicleanup:
1492262153Sluigi		d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
1493262153Sluigi		d->bq_len = 0;
1494262153Sluigi	}
1495262153Sluigi	brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
1496262153Sluigi	brddst->bq_len = 0;
1497262153Sluigi	return 0;
1498262153Sluigi}
1499262153Sluigi
1500262153Sluigi
1501262153Sluigistatic int
1502262153Sluiginetmap_vp_txsync(struct netmap_vp_adapter *na, u_int ring_nr, int flags)
1503262153Sluigi{
1504262153Sluigi	struct netmap_kring *kring = &na->up.tx_rings[ring_nr];
1505262153Sluigi	u_int done;
1506262153Sluigi	u_int const lim = kring->nkr_num_slots - 1;
1507262153Sluigi	u_int const cur = kring->rcur;
1508262153Sluigi
1509262153Sluigi	if (bridge_batch <= 0) { /* testing only */
1510262153Sluigi		done = cur; // used all
1511262153Sluigi		goto done;
1512262153Sluigi	}
1513262153Sluigi	if (bridge_batch > NM_BDG_BATCH)
1514262153Sluigi		bridge_batch = NM_BDG_BATCH;
1515262153Sluigi
1516262153Sluigi	done = nm_bdg_preflush(na, ring_nr, kring, cur);
1517262153Sluigidone:
1518262153Sluigi	if (done != cur)
1519262153Sluigi		D("early break at %d/ %d, tail %d", done, cur, kring->nr_hwtail);
1520262153Sluigi	/*
1521262153Sluigi	 * packets between 'done' and 'cur' are left unsent.
1522262153Sluigi	 */
1523262153Sluigi	kring->nr_hwcur = done;
1524262153Sluigi	kring->nr_hwtail = nm_prev(done, lim);
1525262153Sluigi	nm_txsync_finalize(kring);
1526262153Sluigi	if (netmap_verbose)
1527262153Sluigi		D("%s ring %d flags %d", NM_IFPNAME(na->up.ifp), ring_nr, flags);
1528262153Sluigi	return 0;
1529262153Sluigi}
1530262153Sluigi
1531262153Sluigi
1532262153Sluigi/*
1533262153Sluigi * main dispatch routine for the bridge.
1534262153Sluigi * We already know that only one thread is running this.
1535262153Sluigi * we must run nm_bdg_preflush without lock.
1536262153Sluigi */
1537262153Sluigistatic int
1538262153Sluigibdg_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
1539262153Sluigi{
1540262153Sluigi	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
1541262153Sluigi	return netmap_vp_txsync(vpna, ring_nr, flags);
1542262153Sluigi}
1543262153Sluigi
1544262153Sluigistatic int
1545262153Sluiginetmap_vp_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
1546262153Sluigi{
1547262153Sluigi	struct netmap_kring *kring = &na->rx_rings[ring_nr];
1548262153Sluigi	struct netmap_ring *ring = kring->ring;
1549262153Sluigi	u_int nm_i, lim = kring->nkr_num_slots - 1;
1550262153Sluigi	u_int head = nm_rxsync_prologue(kring);
1551262153Sluigi	int n;
1552262153Sluigi
1553262153Sluigi	if (head > lim) {
1554262153Sluigi		D("ouch dangerous reset!!!");
1555262153Sluigi		n = netmap_ring_reinit(kring);
1556262153Sluigi		goto done;
1557262153Sluigi	}
1558262153Sluigi
1559262153Sluigi	/* First part, import newly received packets. */
1560262153Sluigi	/* actually nothing to do here, they are already in the kring */
1561262153Sluigi
1562262153Sluigi	/* Second part, skip past packets that userspace has released. */
1563262153Sluigi	nm_i = kring->nr_hwcur;
1564262153Sluigi	if (nm_i != head) {
1565262153Sluigi		/* consistency check, but nothing really important here */
1566262153Sluigi		for (n = 0; likely(nm_i != head); n++) {
1567262153Sluigi			struct netmap_slot *slot = &ring->slot[nm_i];
1568262153Sluigi			void *addr = BDG_NMB(na, slot);
1569262153Sluigi
1570262153Sluigi			if (addr == netmap_buffer_base) { /* bad buf */
1571262153Sluigi				D("bad buffer index %d, ignore ?",
1572262153Sluigi					slot->buf_idx);
1573262153Sluigi			}
1574262153Sluigi			slot->flags &= ~NS_BUF_CHANGED;
1575262153Sluigi			nm_i = nm_next(nm_i, lim);
1576262153Sluigi		}
1577262153Sluigi		kring->nr_hwcur = head;
1578262153Sluigi	}
1579262153Sluigi
1580262153Sluigi	/* tell userspace that there are new packets */
1581262153Sluigi	nm_rxsync_finalize(kring);
1582262153Sluigi	n = 0;
1583262153Sluigidone:
1584262153Sluigi	return n;
1585262153Sluigi}
1586262153Sluigi
1587262153Sluigi/*
1588262153Sluigi * user process reading from a VALE switch.
1589262153Sluigi * Already protected against concurrent calls from userspace,
1590262153Sluigi * but we must acquire the queue's lock to protect against
1591262153Sluigi * writers on the same queue.
1592262153Sluigi */
1593262153Sluigistatic int
1594262153Sluigibdg_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
1595262153Sluigi{
1596262153Sluigi	struct netmap_kring *kring = &na->rx_rings[ring_nr];
1597262153Sluigi	int n;
1598262153Sluigi
1599262153Sluigi	mtx_lock(&kring->q_lock);
1600262153Sluigi	n = netmap_vp_rxsync(na, ring_nr, flags);
1601262153Sluigi	mtx_unlock(&kring->q_lock);
1602262153Sluigi	return n;
1603262153Sluigi}
1604262153Sluigi
1605262153Sluigi
1606262153Sluigistatic int
1607262153Sluigibdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp)
1608262153Sluigi{
1609262153Sluigi	struct netmap_vp_adapter *vpna;
1610262153Sluigi	struct netmap_adapter *na;
1611262153Sluigi	int error;
1612262153Sluigi	u_int npipes = 0;
1613262153Sluigi
1614262153Sluigi	vpna = malloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO);
1615262153Sluigi	if (vpna == NULL)
1616262153Sluigi		return ENOMEM;
1617262153Sluigi
1618262153Sluigi 	na = &vpna->up;
1619262153Sluigi
1620262153Sluigi	na->ifp = ifp;
1621262153Sluigi
1622262153Sluigi	/* bound checking */
1623262153Sluigi	na->num_tx_rings = nmr->nr_tx_rings;
1624262153Sluigi	nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1625262153Sluigi	nmr->nr_tx_rings = na->num_tx_rings; // write back
1626262153Sluigi	na->num_rx_rings = nmr->nr_rx_rings;
1627262153Sluigi	nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1628262153Sluigi	nmr->nr_rx_rings = na->num_rx_rings; // write back
1629262153Sluigi	nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE,
1630262153Sluigi			1, NM_BDG_MAXSLOTS, NULL);
1631262153Sluigi	na->num_tx_desc = nmr->nr_tx_slots;
1632262153Sluigi	nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE,
1633262153Sluigi			1, NM_BDG_MAXSLOTS, NULL);
1634262153Sluigi	/* validate number of pipes. We want at least 1,
1635262153Sluigi	 * but probably can do with some more.
1636262153Sluigi	 * So let's use 2 as default (when 0 is supplied)
1637262153Sluigi	 */
1638262153Sluigi	npipes = nmr->nr_arg1;
1639262153Sluigi	nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL);
1640262153Sluigi	nmr->nr_arg1 = npipes;	/* write back */
1641262153Sluigi	/* validate extra bufs */
1642262153Sluigi	nm_bound_var(&nmr->nr_arg3, 0, 0,
1643262153Sluigi			128*NM_BDG_MAXSLOTS, NULL);
1644262153Sluigi	na->num_rx_desc = nmr->nr_rx_slots;
1645262153Sluigi	vpna->virt_hdr_len = 0;
1646262153Sluigi	vpna->mfs = 1514;
1647262153Sluigi	/*if (vpna->mfs > netmap_buf_size)  TODO netmap_buf_size is zero??
1648262153Sluigi		vpna->mfs = netmap_buf_size; */
1649262153Sluigi        if (netmap_verbose)
1650262153Sluigi		D("max frame size %u", vpna->mfs);
1651262153Sluigi
1652262153Sluigi	na->na_flags |= NAF_BDG_MAYSLEEP | NAF_MEM_OWNER;
1653262153Sluigi	na->nm_txsync = bdg_netmap_txsync;
1654262153Sluigi	na->nm_rxsync = bdg_netmap_rxsync;
1655262153Sluigi	na->nm_register = bdg_netmap_reg;
1656262153Sluigi	na->nm_dtor = netmap_adapter_vp_dtor;
1657262153Sluigi	na->nm_krings_create = netmap_vp_krings_create;
1658262153Sluigi	na->nm_krings_delete = netmap_vp_krings_delete;
1659262153Sluigi	na->nm_mem = netmap_mem_private_new(NM_IFPNAME(na->ifp),
1660262153Sluigi			na->num_tx_rings, na->num_tx_desc,
1661262153Sluigi			na->num_rx_rings, na->num_rx_desc,
1662262153Sluigi			nmr->nr_arg3, npipes, &error);
1663262153Sluigi	if (na->nm_mem == NULL)
1664262153Sluigi		goto err;
1665262153Sluigi	/* other nmd fields are set in the common routine */
1666262153Sluigi	error = netmap_attach_common(na);
1667262153Sluigi	if (error)
1668262153Sluigi		goto err;
1669262153Sluigi	return 0;
1670262153Sluigi
1671262153Sluigierr:
1672262153Sluigi	if (na->nm_mem != NULL)
1673262153Sluigi		netmap_mem_private_delete(na->nm_mem);
1674262153Sluigi	free(vpna, M_DEVBUF);
1675262153Sluigi	return error;
1676262153Sluigi}
1677262153Sluigi
1678262153Sluigi
1679262153Sluigistatic void
1680262153Sluiginetmap_bwrap_dtor(struct netmap_adapter *na)
1681262153Sluigi{
1682262153Sluigi	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
1683262153Sluigi	struct netmap_adapter *hwna = bna->hwna;
1684262153Sluigi	struct nm_bridge *b = bna->up.na_bdg,
1685262153Sluigi		*bh = bna->host.na_bdg;
1686262153Sluigi	struct ifnet *ifp = na->ifp;
1687262153Sluigi
1688262153Sluigi	ND("na %p", na);
1689262153Sluigi
1690262153Sluigi	if (b) {
1691262153Sluigi		netmap_bdg_detach_common(b, bna->up.bdg_port,
1692262153Sluigi			(bh ? bna->host.bdg_port : -1));
1693262153Sluigi	}
1694262153Sluigi
1695262153Sluigi	hwna->na_private = NULL;
1696262153Sluigi	netmap_adapter_put(hwna);
1697262153Sluigi
1698262153Sluigi	bzero(ifp, sizeof(*ifp));
1699262153Sluigi	free(ifp, M_DEVBUF);
1700262153Sluigi	na->ifp = NULL;
1701262153Sluigi
1702262153Sluigi}
1703262153Sluigi
1704262153Sluigi
1705262153Sluigi/*
1706262153Sluigi * Intr callback for NICs connected to a bridge.
1707262153Sluigi * Simply ignore tx interrupts (maybe we could try to recover space ?)
1708262153Sluigi * and pass received packets from nic to the bridge.
1709262153Sluigi *
1710262153Sluigi * XXX TODO check locking: this is called from the interrupt
1711262153Sluigi * handler so we should make sure that the interface is not
1712262153Sluigi * disconnected while passing down an interrupt.
1713262153Sluigi *
1714262153Sluigi * Note, no user process can access this NIC or the host stack.
1715262153Sluigi * The only part of the ring that is significant are the slots,
1716262153Sluigi * and head/cur/tail are set from the kring as needed
1717262153Sluigi * (part as a receive ring, part as a transmit ring).
1718262153Sluigi *
1719262153Sluigi * callback that overwrites the hwna notify callback.
1720262153Sluigi * Packets come from the outside or from the host stack and are put on an hwna rx ring.
1721262153Sluigi * The bridge wrapper then sends the packets through the bridge.
1722262153Sluigi */
1723262153Sluigistatic int
1724262153Sluiginetmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, int flags)
1725262153Sluigi{
1726262153Sluigi	struct ifnet *ifp = na->ifp;
1727262153Sluigi	struct netmap_bwrap_adapter *bna = na->na_private;
1728262153Sluigi	struct netmap_vp_adapter *hostna = &bna->host;
1729262153Sluigi	struct netmap_kring *kring, *bkring;
1730262153Sluigi	struct netmap_ring *ring;
1731262153Sluigi	int is_host_ring = ring_nr == na->num_rx_rings;
1732262153Sluigi	struct netmap_vp_adapter *vpna = &bna->up;
1733262153Sluigi	int error = 0;
1734262153Sluigi
1735262153Sluigi	if (netmap_verbose)
1736262153Sluigi	    D("%s %s%d 0x%x", NM_IFPNAME(ifp),
1737262153Sluigi		(tx == NR_TX ? "TX" : "RX"), ring_nr, flags);
1738262153Sluigi
1739262153Sluigi	if (flags & NAF_DISABLE_NOTIFY) {
1740262153Sluigi		kring = tx == NR_TX ? na->tx_rings : na->rx_rings;
1741262153Sluigi		bkring = tx == NR_TX ? vpna->up.rx_rings : vpna->up.tx_rings;
1742262153Sluigi		if (kring[ring_nr].nkr_stopped)
1743262153Sluigi			netmap_disable_ring(&bkring[ring_nr]);
1744262153Sluigi		else
1745262153Sluigi			bkring[ring_nr].nkr_stopped = 0;
1746262153Sluigi		return 0;
1747262153Sluigi	}
1748262153Sluigi
1749262153Sluigi	if (ifp == NULL || !(ifp->if_capenable & IFCAP_NETMAP))
1750262153Sluigi		return 0;
1751262153Sluigi
1752262153Sluigi	/* we only care about receive interrupts */
1753262153Sluigi	if (tx == NR_TX)
1754262153Sluigi		return 0;
1755262153Sluigi
1756262153Sluigi	kring = &na->rx_rings[ring_nr];
1757262153Sluigi	ring = kring->ring;
1758262153Sluigi
1759262153Sluigi	/* make sure the ring is not disabled */
1760262153Sluigi	if (nm_kr_tryget(kring))
1761262153Sluigi		return 0;
1762262153Sluigi
1763262153Sluigi	if (is_host_ring && hostna->na_bdg == NULL) {
1764262153Sluigi		error = bna->save_notify(na, ring_nr, tx, flags);
1765262153Sluigi		goto put_out;
1766262153Sluigi	}
1767262153Sluigi
1768262153Sluigi	/* Here we expect ring->head = ring->cur = ring->tail
1769262153Sluigi	 * because everything has been released from the previous round.
1770262153Sluigi	 * However the ring is shared and we might have info from
1771262153Sluigi	 * the wrong side (the tx ring). Hence we overwrite with
1772262153Sluigi	 * the info from the rx kring.
1773262153Sluigi	 */
1774262153Sluigi	if (netmap_verbose)
1775262153Sluigi	    D("%s head %d cur %d tail %d (kring %d %d %d)",  NM_IFPNAME(ifp),
1776262153Sluigi		ring->head, ring->cur, ring->tail,
1777262153Sluigi		kring->rhead, kring->rcur, kring->rtail);
1778262153Sluigi
1779262153Sluigi	ring->head = kring->rhead;
1780262153Sluigi	ring->cur = kring->rcur;
1781262153Sluigi	ring->tail = kring->rtail;
1782262153Sluigi
1783262153Sluigi	if (is_host_ring) {
1784262153Sluigi		vpna = hostna;
1785262153Sluigi		ring_nr = 0;
1786262153Sluigi	}
1787262153Sluigi	/* simulate a user wakeup on the rx ring */
1788262153Sluigi	/* fetch packets that have arrived.
1789262153Sluigi	 * XXX maybe do this in a loop ?
1790262153Sluigi	 */
1791262153Sluigi	error = kring->nm_sync(kring, 0);
1792262153Sluigi	if (error)
1793262153Sluigi		goto put_out;
1794262153Sluigi	if (kring->nr_hwcur == kring->nr_hwtail && netmap_verbose) {
1795262153Sluigi		D("how strange, interrupt with no packets on %s",
1796262153Sluigi			NM_IFPNAME(ifp));
1797262153Sluigi		goto put_out;
1798262153Sluigi	}
1799262153Sluigi
1800262153Sluigi	/* new packets are ring->cur to ring->tail, and the bkring
1801262153Sluigi	 * had hwcur == ring->cur. So advance ring->cur to ring->tail
1802262153Sluigi	 * to push all packets out.
1803262153Sluigi	 */
1804262153Sluigi	ring->head = ring->cur = ring->tail;
1805262153Sluigi
1806262153Sluigi	/* also set tail to what the bwrap expects */
1807262153Sluigi	bkring = &vpna->up.tx_rings[ring_nr];
1808262153Sluigi	ring->tail = bkring->nr_hwtail; // rtail too ?
1809262153Sluigi
1810262153Sluigi	/* pass packets to the switch */
1811262153Sluigi	nm_txsync_prologue(bkring); // XXX error checking ?
1812262153Sluigi	netmap_vp_txsync(vpna, ring_nr, flags);
1813262153Sluigi
1814262153Sluigi	/* mark all buffers as released on this ring */
1815262153Sluigi	ring->head = ring->cur = kring->nr_hwtail;
1816262153Sluigi	ring->tail = kring->rtail;
1817262153Sluigi	/* another call to actually release the buffers */
1818262153Sluigi	if (!is_host_ring) {
1819262153Sluigi		error = kring->nm_sync(kring, 0);
1820262153Sluigi	} else {
1821262153Sluigi		/* mark all packets as released, as in the
1822262153Sluigi		 * second part of netmap_rxsync_from_host()
1823262153Sluigi		 */
1824262153Sluigi		kring->nr_hwcur = kring->nr_hwtail;
1825262153Sluigi		nm_rxsync_finalize(kring);
1826262153Sluigi	}
1827262153Sluigi
1828262153Sluigiput_out:
1829262153Sluigi	nm_kr_put(kring);
1830262153Sluigi	return error;
1831262153Sluigi}
1832262153Sluigi
1833262153Sluigi
1834262153Sluigistatic int
1835262153Sluiginetmap_bwrap_register(struct netmap_adapter *na, int onoff)
1836262153Sluigi{
1837262153Sluigi	struct netmap_bwrap_adapter *bna =
1838262153Sluigi		(struct netmap_bwrap_adapter *)na;
1839262153Sluigi	struct netmap_adapter *hwna = bna->hwna;
1840262153Sluigi	struct netmap_vp_adapter *hostna = &bna->host;
1841262153Sluigi	int error;
1842262153Sluigi
1843262153Sluigi	ND("%s %s", NM_IFPNAME(na->ifp), onoff ? "on" : "off");
1844262153Sluigi
1845262153Sluigi	if (onoff) {
1846262153Sluigi		int i;
1847262153Sluigi
1848262153Sluigi		hwna->na_lut = na->na_lut;
1849262153Sluigi		hwna->na_lut_objtotal = na->na_lut_objtotal;
1850262153Sluigi
1851262153Sluigi		if (hostna->na_bdg) {
1852262153Sluigi			hostna->up.na_lut = na->na_lut;
1853262153Sluigi			hostna->up.na_lut_objtotal = na->na_lut_objtotal;
1854262153Sluigi		}
1855262153Sluigi
1856262153Sluigi		/* cross-link the netmap rings
1857262153Sluigi		 * The original number of rings comes from hwna,
1858262153Sluigi		 * rx rings on one side equals tx rings on the other.
1859262153Sluigi		 */
1860262153Sluigi		for (i = 0; i < na->num_rx_rings + 1; i++) {
1861262153Sluigi			hwna->tx_rings[i].nkr_num_slots = na->rx_rings[i].nkr_num_slots;
1862262153Sluigi			hwna->tx_rings[i].ring = na->rx_rings[i].ring;
1863262153Sluigi		}
1864262153Sluigi		for (i = 0; i < na->num_tx_rings + 1; i++) {
1865262153Sluigi			hwna->rx_rings[i].nkr_num_slots = na->tx_rings[i].nkr_num_slots;
1866262153Sluigi			hwna->rx_rings[i].ring = na->tx_rings[i].ring;
1867262153Sluigi		}
1868262153Sluigi	}
1869262153Sluigi
1870262153Sluigi	if (hwna->ifp) {
1871262153Sluigi		error = hwna->nm_register(hwna, onoff);
1872262153Sluigi		if (error)
1873262153Sluigi			return error;
1874262153Sluigi	}
1875262153Sluigi
1876262153Sluigi	bdg_netmap_reg(na, onoff);
1877262153Sluigi
1878262153Sluigi	if (onoff) {
1879262153Sluigi		bna->save_notify = hwna->nm_notify;
1880262153Sluigi		hwna->nm_notify = netmap_bwrap_intr_notify;
1881262153Sluigi	} else {
1882262153Sluigi		hwna->nm_notify = bna->save_notify;
1883262153Sluigi		hwna->na_lut = NULL;
1884262153Sluigi		hwna->na_lut_objtotal = 0;
1885262153Sluigi	}
1886262153Sluigi
1887262153Sluigi	return 0;
1888262153Sluigi}
1889262153Sluigi
1890262153Sluigi
1891262153Sluigistatic int
1892262153Sluiginetmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
1893262153Sluigi				    u_int *rxr, u_int *rxd)
1894262153Sluigi{
1895262153Sluigi	struct netmap_bwrap_adapter *bna =
1896262153Sluigi		(struct netmap_bwrap_adapter *)na;
1897262153Sluigi	struct netmap_adapter *hwna = bna->hwna;
1898262153Sluigi
1899262153Sluigi	/* forward the request */
1900262153Sluigi	netmap_update_config(hwna);
1901262153Sluigi	/* swap the results */
1902262153Sluigi	*txr = hwna->num_rx_rings;
1903262153Sluigi	*txd = hwna->num_rx_desc;
1904262153Sluigi	*rxr = hwna->num_tx_rings;
1905262153Sluigi	*rxd = hwna->num_rx_desc;
1906262153Sluigi
1907262153Sluigi	return 0;
1908262153Sluigi}
1909262153Sluigi
1910262153Sluigi
1911262153Sluigistatic int
1912262153Sluiginetmap_bwrap_krings_create(struct netmap_adapter *na)
1913262153Sluigi{
1914262153Sluigi	struct netmap_bwrap_adapter *bna =
1915262153Sluigi		(struct netmap_bwrap_adapter *)na;
1916262153Sluigi	struct netmap_adapter *hwna = bna->hwna;
1917262153Sluigi	struct netmap_adapter *hostna = &bna->host.up;
1918262153Sluigi	int error;
1919262153Sluigi
1920262153Sluigi	ND("%s", NM_IFPNAME(na->ifp));
1921262153Sluigi
1922262153Sluigi	error = netmap_vp_krings_create(na);
1923262153Sluigi	if (error)
1924262153Sluigi		return error;
1925262153Sluigi
1926262153Sluigi	error = hwna->nm_krings_create(hwna);
1927262153Sluigi	if (error) {
1928262153Sluigi		netmap_vp_krings_delete(na);
1929262153Sluigi		return error;
1930262153Sluigi	}
1931262153Sluigi
1932262153Sluigi	if (na->na_flags & NAF_HOST_RINGS) {
1933262153Sluigi		hostna->tx_rings = na->tx_rings + na->num_tx_rings;
1934262153Sluigi		hostna->rx_rings = na->rx_rings + na->num_rx_rings;
1935262153Sluigi	}
1936262153Sluigi
1937262153Sluigi	return 0;
1938262153Sluigi}
1939262153Sluigi
1940262153Sluigi
1941262153Sluigistatic void
1942262153Sluiginetmap_bwrap_krings_delete(struct netmap_adapter *na)
1943262153Sluigi{
1944262153Sluigi	struct netmap_bwrap_adapter *bna =
1945262153Sluigi		(struct netmap_bwrap_adapter *)na;
1946262153Sluigi	struct netmap_adapter *hwna = bna->hwna;
1947262153Sluigi
1948262153Sluigi	ND("%s", NM_IFPNAME(na->ifp));
1949262153Sluigi
1950262153Sluigi	hwna->nm_krings_delete(hwna);
1951262153Sluigi	netmap_vp_krings_delete(na);
1952262153Sluigi}
1953262153Sluigi
1954262153Sluigi
1955262153Sluigi/* notify method for the bridge-->hwna direction */
1956262153Sluigistatic int
1957262153Sluiginetmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
1958262153Sluigi{
1959262153Sluigi	struct netmap_bwrap_adapter *bna =
1960262153Sluigi		(struct netmap_bwrap_adapter *)na;
1961262153Sluigi	struct netmap_adapter *hwna = bna->hwna;
1962262153Sluigi	struct netmap_kring *kring, *hw_kring;
1963262153Sluigi	struct netmap_ring *ring;
1964262153Sluigi	u_int lim;
1965262153Sluigi	int error = 0;
1966262153Sluigi
1967262153Sluigi	if (tx == NR_TX)
1968262153Sluigi	        return EINVAL;
1969262153Sluigi
1970262153Sluigi	kring = &na->rx_rings[ring_n];
1971262153Sluigi	hw_kring = &hwna->tx_rings[ring_n];
1972262153Sluigi	ring = kring->ring;
1973262153Sluigi	lim = kring->nkr_num_slots - 1;
1974262153Sluigi
1975262153Sluigi	if (hwna->ifp == NULL || !(hwna->ifp->if_capenable & IFCAP_NETMAP))
1976262153Sluigi		return 0;
1977262153Sluigi	mtx_lock(&kring->q_lock);
1978262153Sluigi	/* first step: simulate a user wakeup on the rx ring */
1979262153Sluigi	netmap_vp_rxsync(na, ring_n, flags);
1980262153Sluigi	ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
1981262153Sluigi		NM_IFPNAME(na->ifp), ring_n,
1982262153Sluigi		kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
1983262153Sluigi		ring->head, ring->cur, ring->tail,
1984262153Sluigi		hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail);
1985262153Sluigi	/* second step: the simulated user consumes all new packets */
1986262153Sluigi	ring->head = ring->cur = ring->tail;
1987262153Sluigi
1988262153Sluigi	/* third step: the new packets are sent on the tx ring
1989262153Sluigi	 * (which is actually the same ring)
1990262153Sluigi	 */
1991262153Sluigi	/* set tail to what the hw expects */
1992262153Sluigi	ring->tail = hw_kring->rtail;
1993262153Sluigi	nm_txsync_prologue(&hwna->tx_rings[ring_n]); // XXX error checking ?
1994262153Sluigi	error = hw_kring->nm_sync(hw_kring, flags);
1995262153Sluigi
1996262153Sluigi	/* fourth step: now we are back the rx ring */
1997262153Sluigi	/* claim ownership on all hw owned bufs */
1998262153Sluigi	ring->head = nm_next(ring->tail, lim); /* skip past reserved slot */
1999262153Sluigi	ring->tail = kring->rtail; /* restore saved value of tail, for safety */
2000262153Sluigi
2001262153Sluigi	/* fifth step: the user goes to sleep again, causing another rxsync */
2002262153Sluigi	netmap_vp_rxsync(na, ring_n, flags);
2003262153Sluigi	ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
2004262153Sluigi		NM_IFPNAME(na->ifp), ring_n,
2005262153Sluigi		kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
2006262153Sluigi		ring->head, ring->cur, ring->tail,
2007262153Sluigi		hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
2008262153Sluigi	mtx_unlock(&kring->q_lock);
2009262153Sluigi	return error;
2010262153Sluigi}
2011262153Sluigi
2012262153Sluigi
2013262153Sluigistatic int
2014262153Sluiginetmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
2015262153Sluigi{
2016262153Sluigi	struct netmap_bwrap_adapter *bna = na->na_private;
2017262153Sluigi	struct netmap_adapter *port_na = &bna->up.up;
2018262153Sluigi	if (tx == NR_TX || ring_n != 0)
2019262153Sluigi		return EINVAL;
2020262153Sluigi	return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags);
2021262153Sluigi}
2022262153Sluigi
2023262153Sluigi
2024262153Sluigi/* attach a bridge wrapper to the 'real' device */
2025262153Sluigistatic int
2026262153Sluiginetmap_bwrap_attach(struct ifnet *fake, struct ifnet *real)
2027262153Sluigi{
2028262153Sluigi	struct netmap_bwrap_adapter *bna;
2029262153Sluigi	struct netmap_adapter *na;
2030262153Sluigi	struct netmap_adapter *hwna = NA(real);
2031262153Sluigi	struct netmap_adapter *hostna;
2032262153Sluigi	int error;
2033262153Sluigi
2034262153Sluigi
2035262153Sluigi	bna = malloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO);
2036262153Sluigi	if (bna == NULL)
2037262153Sluigi		return ENOMEM;
2038262153Sluigi
2039262153Sluigi	na = &bna->up.up;
2040262153Sluigi	na->ifp = fake;
2041262153Sluigi	/* fill the ring data for the bwrap adapter with rx/tx meanings
2042262153Sluigi	 * swapped. The real cross-linking will be done during register,
2043262153Sluigi	 * when all the krings will have been created.
2044262153Sluigi	 */
2045262153Sluigi	na->num_rx_rings = hwna->num_tx_rings;
2046262153Sluigi	na->num_tx_rings = hwna->num_rx_rings;
2047262153Sluigi	na->num_tx_desc = hwna->num_rx_desc;
2048262153Sluigi	na->num_rx_desc = hwna->num_tx_desc;
2049262153Sluigi	na->nm_dtor = netmap_bwrap_dtor;
2050262153Sluigi	na->nm_register = netmap_bwrap_register;
2051262153Sluigi	// na->nm_txsync = netmap_bwrap_txsync;
2052262153Sluigi	// na->nm_rxsync = netmap_bwrap_rxsync;
2053262153Sluigi	na->nm_config = netmap_bwrap_config;
2054262153Sluigi	na->nm_krings_create = netmap_bwrap_krings_create;
2055262153Sluigi	na->nm_krings_delete = netmap_bwrap_krings_delete;
2056262153Sluigi	na->nm_notify = netmap_bwrap_notify;
2057262153Sluigi	na->nm_mem = hwna->nm_mem;
2058262153Sluigi	na->na_private = na; /* prevent NIOCREGIF */
2059262153Sluigi	bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
2060262153Sluigi
2061262153Sluigi	bna->hwna = hwna;
2062262153Sluigi	netmap_adapter_get(hwna);
2063262153Sluigi	hwna->na_private = bna; /* weak reference */
2064262153Sluigi
2065262153Sluigi	if (hwna->na_flags & NAF_HOST_RINGS) {
2066262153Sluigi		na->na_flags |= NAF_HOST_RINGS;
2067262153Sluigi		hostna = &bna->host.up;
2068262153Sluigi		hostna->ifp = hwna->ifp;
2069262153Sluigi		hostna->num_tx_rings = 1;
2070262153Sluigi		hostna->num_tx_desc = hwna->num_rx_desc;
2071262153Sluigi		hostna->num_rx_rings = 1;
2072262153Sluigi		hostna->num_rx_desc = hwna->num_tx_desc;
2073262153Sluigi		// hostna->nm_txsync = netmap_bwrap_host_txsync;
2074262153Sluigi		// hostna->nm_rxsync = netmap_bwrap_host_rxsync;
2075262153Sluigi		hostna->nm_notify = netmap_bwrap_host_notify;
2076262153Sluigi		hostna->nm_mem = na->nm_mem;
2077262153Sluigi		hostna->na_private = bna;
2078262153Sluigi	}
2079262153Sluigi
2080262153Sluigi	ND("%s<->%s txr %d txd %d rxr %d rxd %d",
2081262153Sluigi		fake->if_xname, real->if_xname,
2082262153Sluigi		na->num_tx_rings, na->num_tx_desc,
2083262153Sluigi		na->num_rx_rings, na->num_rx_desc);
2084262153Sluigi
2085262153Sluigi	error = netmap_attach_common(na);
2086262153Sluigi	if (error) {
2087262153Sluigi		netmap_adapter_put(hwna);
2088262153Sluigi		free(bna, M_DEVBUF);
2089262153Sluigi		return error;
2090262153Sluigi	}
2091262153Sluigi	return 0;
2092262153Sluigi}
2093262153Sluigi
2094262153Sluigi
2095262153Sluigivoid
2096262153Sluiginetmap_init_bridges(void)
2097262153Sluigi{
2098262153Sluigi	int i;
2099262153Sluigi	bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */
2100262153Sluigi	for (i = 0; i < NM_BRIDGES; i++)
2101262153Sluigi		BDG_RWINIT(&nm_bridges[i]);
2102262153Sluigi}
2103262153Sluigi#endif /* WITH_VALE */
2104