netmap_vale.c revision 267128
1259412Sluigi/*
2260368Sluigi * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
3259412Sluigi *
4259412Sluigi * Redistribution and use in source and binary forms, with or without
5259412Sluigi * modification, are permitted provided that the following conditions
6259412Sluigi * are met:
7259412Sluigi *   1. Redistributions of source code must retain the above copyright
8259412Sluigi *      notice, this list of conditions and the following disclaimer.
9259412Sluigi *   2. Redistributions in binary form must reproduce the above copyright
10259412Sluigi *      notice, this list of conditions and the following disclaimer in the
11259412Sluigi *      documentation and/or other materials provided with the distribution.
12259412Sluigi *
13259412Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14259412Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15259412Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16259412Sluigi * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17259412Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18259412Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19259412Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20259412Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21259412Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22259412Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23259412Sluigi * SUCH DAMAGE.
24259412Sluigi */
25259412Sluigi
26259412Sluigi
27259412Sluigi/*
28259412Sluigi * This module implements the VALE switch for netmap
29259412Sluigi
30259412Sluigi--- VALE SWITCH ---
31259412Sluigi
32259412SluigiNMG_LOCK() serializes all modifications to switches and ports.
33259412SluigiA switch cannot be deleted until all ports are gone.
34259412Sluigi
35259412SluigiFor each switch, an SX lock (RWlock on linux) protects
36259412Sluigideletion of ports. When configuring or deleting a new port, the
37259412Sluigilock is acquired in exclusive mode (after holding NMG_LOCK).
38259412SluigiWhen forwarding, the lock is acquired in shared mode (without NMG_LOCK).
39259412SluigiThe lock is held throughout the entire forwarding cycle,
40259412Sluigiduring which the thread may incur in a page fault.
41259412SluigiHence it is important that sleepable shared locks are used.
42259412Sluigi
43259412SluigiOn the rx ring, the per-port lock is grabbed initially to reserve
44259412Sluigia number of slot in the ring, then the lock is released,
45259412Sluigipackets are copied from source to destination, and then
46259412Sluigithe lock is acquired again and the receive ring is updated.
47259412Sluigi(A similar thing is done on the tx ring for NIC and host stack
48259412Sluigiports attached to the switch)
49259412Sluigi
50259412Sluigi */
51259412Sluigi
52259412Sluigi/*
53259412Sluigi * OS-specific code that is used only within this file.
54259412Sluigi * Other OS-specific code that must be accessed by drivers
55259412Sluigi * is present in netmap_kern.h
56259412Sluigi */
57259412Sluigi
58259412Sluigi#if defined(__FreeBSD__)
59259412Sluigi#include <sys/cdefs.h> /* prerequisite */
60259412Sluigi__FBSDID("$FreeBSD: head/sys/dev/netmap/netmap_vale.c 267128 2014-06-05 21:12:41Z luigi $");
61259412Sluigi
62259412Sluigi#include <sys/types.h>
63259412Sluigi#include <sys/errno.h>
64259412Sluigi#include <sys/param.h>	/* defines used in kernel.h */
65259412Sluigi#include <sys/kernel.h>	/* types used in module initialization */
66259412Sluigi#include <sys/conf.h>	/* cdevsw struct, UID, GID */
67259412Sluigi#include <sys/sockio.h>
68259412Sluigi#include <sys/socketvar.h>	/* struct socket */
69259412Sluigi#include <sys/malloc.h>
70259412Sluigi#include <sys/poll.h>
71259412Sluigi#include <sys/rwlock.h>
72259412Sluigi#include <sys/socket.h> /* sockaddrs */
73259412Sluigi#include <sys/selinfo.h>
74259412Sluigi#include <sys/sysctl.h>
75259412Sluigi#include <net/if.h>
76259412Sluigi#include <net/if_var.h>
77259412Sluigi#include <net/bpf.h>		/* BIOCIMMEDIATE */
78259412Sluigi#include <machine/bus.h>	/* bus_dmamap_* */
79259412Sluigi#include <sys/endian.h>
80259412Sluigi#include <sys/refcount.h>
81259412Sluigi
82259412Sluigi
83259412Sluigi#define BDG_RWLOCK_T		struct rwlock // struct rwlock
84259412Sluigi
85259412Sluigi#define	BDG_RWINIT(b)		\
86259412Sluigi	rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS)
87259412Sluigi#define BDG_WLOCK(b)		rw_wlock(&(b)->bdg_lock)
88259412Sluigi#define BDG_WUNLOCK(b)		rw_wunlock(&(b)->bdg_lock)
89259412Sluigi#define BDG_RLOCK(b)		rw_rlock(&(b)->bdg_lock)
90259412Sluigi#define BDG_RTRYLOCK(b)		rw_try_rlock(&(b)->bdg_lock)
91259412Sluigi#define BDG_RUNLOCK(b)		rw_runlock(&(b)->bdg_lock)
92259412Sluigi#define BDG_RWDESTROY(b)	rw_destroy(&(b)->bdg_lock)
93259412Sluigi
94259412Sluigi
95259412Sluigi#elif defined(linux)
96259412Sluigi
97259412Sluigi#include "bsd_glue.h"
98259412Sluigi
99259412Sluigi#elif defined(__APPLE__)
100259412Sluigi
101259412Sluigi#warning OSX support is only partial
102259412Sluigi#include "osx_glue.h"
103259412Sluigi
104259412Sluigi#else
105259412Sluigi
106259412Sluigi#error	Unsupported platform
107259412Sluigi
108259412Sluigi#endif /* unsupported */
109259412Sluigi
110259412Sluigi/*
111259412Sluigi * common headers
112259412Sluigi */
113259412Sluigi
114259412Sluigi#include <net/netmap.h>
115259412Sluigi#include <dev/netmap/netmap_kern.h>
116259412Sluigi#include <dev/netmap/netmap_mem2.h>
117259412Sluigi
118259412Sluigi#ifdef WITH_VALE
119259412Sluigi
120259412Sluigi/*
121259412Sluigi * system parameters (most of them in netmap_kern.h)
122259412Sluigi * NM_NAME	prefix for switch port names, default "vale"
123259412Sluigi * NM_BDG_MAXPORTS	number of ports
124259412Sluigi * NM_BRIDGES	max number of switches in the system.
125259412Sluigi *	XXX should become a sysctl or tunable
126259412Sluigi *
127259412Sluigi * Switch ports are named valeX:Y where X is the switch name and Y
128259412Sluigi * is the port. If Y matches a physical interface name, the port is
129259412Sluigi * connected to a physical device.
130259412Sluigi *
131259412Sluigi * Unlike physical interfaces, switch ports use their own memory region
132259412Sluigi * for rings and buffers.
133259412Sluigi * The virtual interfaces use per-queue lock instead of core lock.
134259412Sluigi * In the tx loop, we aggregate traffic in batches to make all operations
135259412Sluigi * faster. The batch size is bridge_batch.
136259412Sluigi */
137259412Sluigi#define NM_BDG_MAXRINGS		16	/* XXX unclear how many. */
138259412Sluigi#define NM_BDG_MAXSLOTS		4096	/* XXX same as above */
139259412Sluigi#define NM_BRIDGE_RINGSIZE	1024	/* in the device */
140259412Sluigi#define NM_BDG_HASH		1024	/* forwarding table entries */
141259412Sluigi#define NM_BDG_BATCH		1024	/* entries in the forwarding buffer */
142259412Sluigi#define NM_MULTISEG		64	/* max size of a chain of bufs */
143259412Sluigi/* actual size of the tables */
144259412Sluigi#define NM_BDG_BATCH_MAX	(NM_BDG_BATCH + NM_MULTISEG)
145259412Sluigi/* NM_FT_NULL terminates a list of slots in the ft */
146259412Sluigi#define NM_FT_NULL		NM_BDG_BATCH_MAX
147259412Sluigi#define	NM_BRIDGES		8	/* number of bridges */
148259412Sluigi
149259412Sluigi
150259412Sluigi/*
151259412Sluigi * bridge_batch is set via sysctl to the max batch size to be
152259412Sluigi * used in the bridge. The actual value may be larger as the
153259412Sluigi * last packet in the block may overflow the size.
154259412Sluigi */
155259412Sluigiint bridge_batch = NM_BDG_BATCH; /* bridge batch size */
156259412SluigiSYSCTL_DECL(_dev_netmap);
157259412SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
158259412Sluigi
159259412Sluigi
160259412Sluigistatic int bdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp);
161259412Sluigistatic int bdg_netmap_reg(struct netmap_adapter *na, int onoff);
162259412Sluigistatic int netmap_bwrap_attach(struct ifnet *, struct ifnet *);
163259412Sluigistatic int netmap_bwrap_register(struct netmap_adapter *, int onoff);
164259412Sluigiint kern_netmap_regif(struct nmreq *nmr);
165259412Sluigi
166259412Sluigi/*
167259412Sluigi * For each output interface, nm_bdg_q is used to construct a list.
168259412Sluigi * bq_len is the number of output buffers (we can have coalescing
169259412Sluigi * during the copy).
170259412Sluigi */
171259412Sluigistruct nm_bdg_q {
172259412Sluigi	uint16_t bq_head;
173259412Sluigi	uint16_t bq_tail;
174259412Sluigi	uint32_t bq_len;	/* number of buffers */
175259412Sluigi};
176259412Sluigi
177259412Sluigi/* XXX revise this */
178259412Sluigistruct nm_hash_ent {
179259412Sluigi	uint64_t	mac;	/* the top 2 bytes are the epoch */
180259412Sluigi	uint64_t	ports;
181259412Sluigi};
182259412Sluigi
183259412Sluigi/*
184259412Sluigi * nm_bridge is a descriptor for a VALE switch.
185259412Sluigi * Interfaces for a bridge are all in bdg_ports[].
186259412Sluigi * The array has fixed size, an empty entry does not terminate
187259412Sluigi * the search, but lookups only occur on attach/detach so we
188259412Sluigi * don't mind if they are slow.
189259412Sluigi *
190259412Sluigi * The bridge is non blocking on the transmit ports: excess
191259412Sluigi * packets are dropped if there is no room on the output port.
192259412Sluigi *
193259412Sluigi * bdg_lock protects accesses to the bdg_ports array.
194259412Sluigi * This is a rw lock (or equivalent).
195259412Sluigi */
196259412Sluigistruct nm_bridge {
197259412Sluigi	/* XXX what is the proper alignment/layout ? */
198259412Sluigi	BDG_RWLOCK_T	bdg_lock;	/* protects bdg_ports */
199259412Sluigi	int		bdg_namelen;
200259412Sluigi	uint32_t	bdg_active_ports; /* 0 means free */
201259412Sluigi	char		bdg_basename[IFNAMSIZ];
202259412Sluigi
203259412Sluigi	/* Indexes of active ports (up to active_ports)
204259412Sluigi	 * and all other remaining ports.
205259412Sluigi	 */
206259412Sluigi	uint8_t		bdg_port_index[NM_BDG_MAXPORTS];
207259412Sluigi
208259412Sluigi	struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS];
209259412Sluigi
210259412Sluigi
211259412Sluigi	/*
212259412Sluigi	 * The function to decide the destination port.
213259412Sluigi	 * It returns either of an index of the destination port,
214259412Sluigi	 * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
215259412Sluigi	 * forward this packet.  ring_nr is the source ring index, and the
216259412Sluigi	 * function may overwrite this value to forward this packet to a
217259412Sluigi	 * different ring index.
218259412Sluigi	 * This function must be set by netmap_bdgctl().
219259412Sluigi	 */
220259412Sluigi	bdg_lookup_fn_t nm_bdg_lookup;
221259412Sluigi
222259412Sluigi	/* the forwarding table, MAC+ports.
223259412Sluigi	 * XXX should be changed to an argument to be passed to
224259412Sluigi	 * the lookup function, and allocated on attach
225259412Sluigi	 */
226259412Sluigi	struct nm_hash_ent ht[NM_BDG_HASH];
227259412Sluigi};
228259412Sluigi
229259412Sluigi
230259412Sluigi/*
231259412Sluigi * XXX in principle nm_bridges could be created dynamically
232259412Sluigi * Right now we have a static array and deletions are protected
233259412Sluigi * by an exclusive lock.
234259412Sluigi */
235259412Sluigistruct nm_bridge nm_bridges[NM_BRIDGES];
236259412Sluigi
237259412Sluigi
238259412Sluigi/*
239259412Sluigi * this is a slightly optimized copy routine which rounds
240259412Sluigi * to multiple of 64 bytes and is often faster than dealing
241259412Sluigi * with other odd sizes. We assume there is enough room
242259412Sluigi * in the source and destination buffers.
243259412Sluigi *
244259412Sluigi * XXX only for multiples of 64 bytes, non overlapped.
245259412Sluigi */
246259412Sluigistatic inline void
247259412Sluigipkt_copy(void *_src, void *_dst, int l)
248259412Sluigi{
249259412Sluigi        uint64_t *src = _src;
250259412Sluigi        uint64_t *dst = _dst;
251259412Sluigi        if (unlikely(l >= 1024)) {
252259412Sluigi                memcpy(dst, src, l);
253259412Sluigi                return;
254259412Sluigi        }
255259412Sluigi        for (; likely(l > 0); l-=64) {
256259412Sluigi                *dst++ = *src++;
257259412Sluigi                *dst++ = *src++;
258259412Sluigi                *dst++ = *src++;
259259412Sluigi                *dst++ = *src++;
260259412Sluigi                *dst++ = *src++;
261259412Sluigi                *dst++ = *src++;
262259412Sluigi                *dst++ = *src++;
263259412Sluigi                *dst++ = *src++;
264259412Sluigi        }
265259412Sluigi}
266259412Sluigi
267259412Sluigi
268259412Sluigi/*
269259412Sluigi * locate a bridge among the existing ones.
270259412Sluigi * MUST BE CALLED WITH NMG_LOCK()
271259412Sluigi *
272259412Sluigi * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
273259412Sluigi * We assume that this is called with a name of at least NM_NAME chars.
274259412Sluigi */
275259412Sluigistatic struct nm_bridge *
276259412Sluiginm_find_bridge(const char *name, int create)
277259412Sluigi{
278259412Sluigi	int i, l, namelen;
279259412Sluigi	struct nm_bridge *b = NULL;
280259412Sluigi
281259412Sluigi	NMG_LOCK_ASSERT();
282259412Sluigi
283259412Sluigi	namelen = strlen(NM_NAME);	/* base length */
284259412Sluigi	l = name ? strlen(name) : 0;		/* actual length */
285259412Sluigi	if (l < namelen) {
286259412Sluigi		D("invalid bridge name %s", name ? name : NULL);
287259412Sluigi		return NULL;
288259412Sluigi	}
289259412Sluigi	for (i = namelen + 1; i < l; i++) {
290259412Sluigi		if (name[i] == ':') {
291259412Sluigi			namelen = i;
292259412Sluigi			break;
293259412Sluigi		}
294259412Sluigi	}
295259412Sluigi	if (namelen >= IFNAMSIZ)
296259412Sluigi		namelen = IFNAMSIZ;
297259412Sluigi	ND("--- prefix is '%.*s' ---", namelen, name);
298259412Sluigi
299259412Sluigi	/* lookup the name, remember empty slot if there is one */
300259412Sluigi	for (i = 0; i < NM_BRIDGES; i++) {
301259412Sluigi		struct nm_bridge *x = nm_bridges + i;
302259412Sluigi
303259412Sluigi		if (x->bdg_active_ports == 0) {
304259412Sluigi			if (create && b == NULL)
305259412Sluigi				b = x;	/* record empty slot */
306259412Sluigi		} else if (x->bdg_namelen != namelen) {
307259412Sluigi			continue;
308259412Sluigi		} else if (strncmp(name, x->bdg_basename, namelen) == 0) {
309259412Sluigi			ND("found '%.*s' at %d", namelen, name, i);
310259412Sluigi			b = x;
311259412Sluigi			break;
312259412Sluigi		}
313259412Sluigi	}
314259412Sluigi	if (i == NM_BRIDGES && b) { /* name not found, can create entry */
315259412Sluigi		/* initialize the bridge */
316259412Sluigi		strncpy(b->bdg_basename, name, namelen);
317259412Sluigi		ND("create new bridge %s with ports %d", b->bdg_basename,
318259412Sluigi			b->bdg_active_ports);
319259412Sluigi		b->bdg_namelen = namelen;
320259412Sluigi		b->bdg_active_ports = 0;
321259412Sluigi		for (i = 0; i < NM_BDG_MAXPORTS; i++)
322259412Sluigi			b->bdg_port_index[i] = i;
323259412Sluigi		/* set the default function */
324259412Sluigi		b->nm_bdg_lookup = netmap_bdg_learning;
325259412Sluigi		/* reset the MAC address table */
326259412Sluigi		bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
327259412Sluigi	}
328259412Sluigi	return b;
329259412Sluigi}
330259412Sluigi
331259412Sluigi
332259412Sluigi/*
333259412Sluigi * Free the forwarding tables for rings attached to switch ports.
334259412Sluigi */
335259412Sluigistatic void
336259412Sluiginm_free_bdgfwd(struct netmap_adapter *na)
337259412Sluigi{
338259412Sluigi	int nrings, i;
339259412Sluigi	struct netmap_kring *kring;
340259412Sluigi
341259412Sluigi	NMG_LOCK_ASSERT();
342260368Sluigi	nrings = na->num_tx_rings;
343260368Sluigi	kring = na->tx_rings;
344259412Sluigi	for (i = 0; i < nrings; i++) {
345259412Sluigi		if (kring[i].nkr_ft) {
346259412Sluigi			free(kring[i].nkr_ft, M_DEVBUF);
347259412Sluigi			kring[i].nkr_ft = NULL; /* protect from freeing twice */
348259412Sluigi		}
349259412Sluigi	}
350259412Sluigi}
351259412Sluigi
352259412Sluigi
353259412Sluigi/*
354259412Sluigi * Allocate the forwarding tables for the rings attached to the bridge ports.
355259412Sluigi */
356259412Sluigistatic int
357259412Sluiginm_alloc_bdgfwd(struct netmap_adapter *na)
358259412Sluigi{
359259412Sluigi	int nrings, l, i, num_dstq;
360259412Sluigi	struct netmap_kring *kring;
361259412Sluigi
362259412Sluigi	NMG_LOCK_ASSERT();
363259412Sluigi	/* all port:rings + broadcast */
364259412Sluigi	num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
365259412Sluigi	l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
366259412Sluigi	l += sizeof(struct nm_bdg_q) * num_dstq;
367259412Sluigi	l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
368259412Sluigi
369261909Sluigi	nrings = netmap_real_tx_rings(na);
370259412Sluigi	kring = na->tx_rings;
371259412Sluigi	for (i = 0; i < nrings; i++) {
372259412Sluigi		struct nm_bdg_fwd *ft;
373259412Sluigi		struct nm_bdg_q *dstq;
374259412Sluigi		int j;
375259412Sluigi
376259412Sluigi		ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
377259412Sluigi		if (!ft) {
378259412Sluigi			nm_free_bdgfwd(na);
379259412Sluigi			return ENOMEM;
380259412Sluigi		}
381259412Sluigi		dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
382259412Sluigi		for (j = 0; j < num_dstq; j++) {
383259412Sluigi			dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
384259412Sluigi			dstq[j].bq_len = 0;
385259412Sluigi		}
386259412Sluigi		kring[i].nkr_ft = ft;
387259412Sluigi	}
388259412Sluigi	return 0;
389259412Sluigi}
390259412Sluigi
391259412Sluigi
392259412Sluigistatic void
393259412Sluiginetmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
394259412Sluigi{
395259412Sluigi	int s_hw = hw, s_sw = sw;
396259412Sluigi	int i, lim =b->bdg_active_ports;
397259412Sluigi	uint8_t tmp[NM_BDG_MAXPORTS];
398259412Sluigi
399259412Sluigi	/*
400259412Sluigi	New algorithm:
401259412Sluigi	make a copy of bdg_port_index;
402259412Sluigi	lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
403259412Sluigi	in the array of bdg_port_index, replacing them with
404259412Sluigi	entries from the bottom of the array;
405259412Sluigi	decrement bdg_active_ports;
406259412Sluigi	acquire BDG_WLOCK() and copy back the array.
407259412Sluigi	 */
408259412Sluigi
409261909Sluigi	if (netmap_verbose)
410261909Sluigi		D("detach %d and %d (lim %d)", hw, sw, lim);
411259412Sluigi	/* make a copy of the list of active ports, update it,
412259412Sluigi	 * and then copy back within BDG_WLOCK().
413259412Sluigi	 */
414259412Sluigi	memcpy(tmp, b->bdg_port_index, sizeof(tmp));
415259412Sluigi	for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
416259412Sluigi		if (hw >= 0 && tmp[i] == hw) {
417259412Sluigi			ND("detach hw %d at %d", hw, i);
418259412Sluigi			lim--; /* point to last active port */
419259412Sluigi			tmp[i] = tmp[lim]; /* swap with i */
420259412Sluigi			tmp[lim] = hw;	/* now this is inactive */
421259412Sluigi			hw = -1;
422259412Sluigi		} else if (sw >= 0 && tmp[i] == sw) {
423259412Sluigi			ND("detach sw %d at %d", sw, i);
424259412Sluigi			lim--;
425259412Sluigi			tmp[i] = tmp[lim];
426259412Sluigi			tmp[lim] = sw;
427259412Sluigi			sw = -1;
428259412Sluigi		} else {
429259412Sluigi			i++;
430259412Sluigi		}
431259412Sluigi	}
432259412Sluigi	if (hw >= 0 || sw >= 0) {
433259412Sluigi		D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
434259412Sluigi	}
435259412Sluigi
436259412Sluigi	BDG_WLOCK(b);
437259412Sluigi	b->bdg_ports[s_hw] = NULL;
438259412Sluigi	if (s_sw >= 0) {
439259412Sluigi		b->bdg_ports[s_sw] = NULL;
440259412Sluigi	}
441259412Sluigi	memcpy(b->bdg_port_index, tmp, sizeof(tmp));
442259412Sluigi	b->bdg_active_ports = lim;
443259412Sluigi	BDG_WUNLOCK(b);
444259412Sluigi
445259412Sluigi	ND("now %d active ports", lim);
446259412Sluigi	if (lim == 0) {
447259412Sluigi		ND("marking bridge %s as free", b->bdg_basename);
448259412Sluigi		b->nm_bdg_lookup = NULL;
449259412Sluigi	}
450259412Sluigi}
451259412Sluigi
452260368Sluigi
453259412Sluigistatic void
454259412Sluiginetmap_adapter_vp_dtor(struct netmap_adapter *na)
455259412Sluigi{
456259412Sluigi	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
457259412Sluigi	struct nm_bridge *b = vpna->na_bdg;
458259412Sluigi	struct ifnet *ifp = na->ifp;
459259412Sluigi
460259412Sluigi	ND("%s has %d references", NM_IFPNAME(ifp), na->na_refcount);
461259412Sluigi
462259412Sluigi	if (b) {
463259412Sluigi		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
464259412Sluigi	}
465259412Sluigi
466259412Sluigi	bzero(ifp, sizeof(*ifp));
467259412Sluigi	free(ifp, M_DEVBUF);
468259412Sluigi	na->ifp = NULL;
469259412Sluigi}
470259412Sluigi
471260368Sluigi
472260368Sluigi/* Try to get a reference to a netmap adapter attached to a VALE switch.
473260368Sluigi * If the adapter is found (or is created), this function returns 0, a
474260368Sluigi * non NULL pointer is returned into *na, and the caller holds a
475260368Sluigi * reference to the adapter.
476260368Sluigi * If an adapter is not found, then no reference is grabbed and the
477260368Sluigi * function returns an error code, or 0 if there is just a VALE prefix
478260368Sluigi * mismatch. Therefore the caller holds a reference when
479260368Sluigi * (*na != NULL && return == 0).
480260368Sluigi */
481259412Sluigiint
482259412Sluiginetmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
483259412Sluigi{
484259412Sluigi	const char *name = nmr->nr_name;
485259412Sluigi	struct ifnet *ifp;
486259412Sluigi	int error = 0;
487259412Sluigi	struct netmap_adapter *ret;
488259412Sluigi	struct netmap_vp_adapter *vpna;
489259412Sluigi	struct nm_bridge *b;
490259412Sluigi	int i, j, cand = -1, cand2 = -1;
491259412Sluigi	int needed;
492259412Sluigi
493259412Sluigi	*na = NULL;     /* default return value */
494259412Sluigi
495259412Sluigi	/* first try to see if this is a bridge port. */
496259412Sluigi	NMG_LOCK_ASSERT();
497259412Sluigi	if (strncmp(name, NM_NAME, sizeof(NM_NAME) - 1)) {
498259412Sluigi		return 0;  /* no error, but no VALE prefix */
499259412Sluigi	}
500259412Sluigi
501259412Sluigi	b = nm_find_bridge(name, create);
502259412Sluigi	if (b == NULL) {
503259412Sluigi		D("no bridges available for '%s'", name);
504260700Sluigi		return (create ? ENOMEM : ENXIO);
505259412Sluigi	}
506259412Sluigi
507259412Sluigi	/* Now we are sure that name starts with the bridge's name,
508259412Sluigi	 * lookup the port in the bridge. We need to scan the entire
509259412Sluigi	 * list. It is not important to hold a WLOCK on the bridge
510259412Sluigi	 * during the search because NMG_LOCK already guarantees
511259412Sluigi	 * that there are no other possible writers.
512259412Sluigi	 */
513259412Sluigi
514259412Sluigi	/* lookup in the local list of ports */
515259412Sluigi	for (j = 0; j < b->bdg_active_ports; j++) {
516259412Sluigi		i = b->bdg_port_index[j];
517259412Sluigi		vpna = b->bdg_ports[i];
518259412Sluigi		// KASSERT(na != NULL);
519259412Sluigi		ifp = vpna->up.ifp;
520259412Sluigi		/* XXX make sure the name only contains one : */
521259412Sluigi		if (!strcmp(NM_IFPNAME(ifp), name)) {
522259412Sluigi			netmap_adapter_get(&vpna->up);
523259412Sluigi			ND("found existing if %s refs %d", name,
524259412Sluigi				vpna->na_bdg_refcount);
525259412Sluigi			*na = (struct netmap_adapter *)vpna;
526259412Sluigi			return 0;
527259412Sluigi		}
528259412Sluigi	}
529259412Sluigi	/* not found, should we create it? */
530259412Sluigi	if (!create)
531259412Sluigi		return ENXIO;
532259412Sluigi	/* yes we should, see if we have space to attach entries */
533259412Sluigi	needed = 2; /* in some cases we only need 1 */
534259412Sluigi	if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
535259412Sluigi		D("bridge full %d, cannot create new port", b->bdg_active_ports);
536260700Sluigi		return ENOMEM;
537259412Sluigi	}
538259412Sluigi	/* record the next two ports available, but do not allocate yet */
539259412Sluigi	cand = b->bdg_port_index[b->bdg_active_ports];
540259412Sluigi	cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
541259412Sluigi	ND("+++ bridge %s port %s used %d avail %d %d",
542259412Sluigi		b->bdg_basename, name, b->bdg_active_ports, cand, cand2);
543259412Sluigi
544259412Sluigi	/*
545259412Sluigi	 * try see if there is a matching NIC with this name
546259412Sluigi	 * (after the bridge's name)
547259412Sluigi	 */
548259412Sluigi	ifp = ifunit_ref(name + b->bdg_namelen + 1);
549259412Sluigi	if (!ifp) { /* this is a virtual port */
550259412Sluigi		if (nmr->nr_cmd) {
551259412Sluigi			/* nr_cmd must be 0 for a virtual port */
552259412Sluigi			return EINVAL;
553259412Sluigi		}
554259412Sluigi
555259412Sluigi	 	/* create a struct ifnet for the new port.
556259412Sluigi		 * need M_NOWAIT as we are under nma_lock
557259412Sluigi		 */
558259412Sluigi		ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
559259412Sluigi		if (!ifp)
560259412Sluigi			return ENOMEM;
561259412Sluigi
562259412Sluigi		strcpy(ifp->if_xname, name);
563259412Sluigi		/* bdg_netmap_attach creates a struct netmap_adapter */
564259412Sluigi		error = bdg_netmap_attach(nmr, ifp);
565259412Sluigi		if (error) {
566259412Sluigi			D("error %d", error);
567259412Sluigi			free(ifp, M_DEVBUF);
568259412Sluigi			return error;
569259412Sluigi		}
570259412Sluigi		ret = NA(ifp);
571259412Sluigi		cand2 = -1;	/* only need one port */
572259412Sluigi	} else {  /* this is a NIC */
573259412Sluigi		struct ifnet *fake_ifp;
574259412Sluigi
575259412Sluigi		error = netmap_get_hw_na(ifp, &ret);
576259412Sluigi		if (error || ret == NULL)
577259412Sluigi			goto out;
578259412Sluigi
579259412Sluigi		/* make sure the NIC is not already in use */
580259412Sluigi		if (NETMAP_OWNED_BY_ANY(ret)) {
581259412Sluigi			D("NIC %s busy, cannot attach to bridge",
582259412Sluigi				NM_IFPNAME(ifp));
583260700Sluigi			error = EBUSY;
584259412Sluigi			goto out;
585259412Sluigi		}
586259412Sluigi		/* create a fake interface */
587259412Sluigi		fake_ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
588259412Sluigi		if (!fake_ifp) {
589259412Sluigi			error = ENOMEM;
590259412Sluigi			goto out;
591259412Sluigi		}
592259412Sluigi		strcpy(fake_ifp->if_xname, name);
593259412Sluigi		error = netmap_bwrap_attach(fake_ifp, ifp);
594259412Sluigi		if (error) {
595259412Sluigi			free(fake_ifp, M_DEVBUF);
596259412Sluigi			goto out;
597259412Sluigi		}
598259412Sluigi		ret = NA(fake_ifp);
599259412Sluigi		if (nmr->nr_arg1 != NETMAP_BDG_HOST)
600259412Sluigi			cand2 = -1; /* only need one port */
601259412Sluigi		if_rele(ifp);
602259412Sluigi	}
603259412Sluigi	vpna = (struct netmap_vp_adapter *)ret;
604259412Sluigi
605259412Sluigi	BDG_WLOCK(b);
606259412Sluigi	vpna->bdg_port = cand;
607259412Sluigi	ND("NIC  %p to bridge port %d", vpna, cand);
608259412Sluigi	/* bind the port to the bridge (virtual ports are not active) */
609259412Sluigi	b->bdg_ports[cand] = vpna;
610259412Sluigi	vpna->na_bdg = b;
611259412Sluigi	b->bdg_active_ports++;
612259412Sluigi	if (cand2 >= 0) {
613259412Sluigi		struct netmap_vp_adapter *hostna = vpna + 1;
614259412Sluigi		/* also bind the host stack to the bridge */
615259412Sluigi		b->bdg_ports[cand2] = hostna;
616259412Sluigi		hostna->bdg_port = cand2;
617259412Sluigi		hostna->na_bdg = b;
618259412Sluigi		b->bdg_active_ports++;
619259412Sluigi		ND("host %p to bridge port %d", hostna, cand2);
620259412Sluigi	}
621259412Sluigi	ND("if %s refs %d", name, vpna->up.na_refcount);
622259412Sluigi	BDG_WUNLOCK(b);
623259412Sluigi	*na = ret;
624259412Sluigi	netmap_adapter_get(ret);
625259412Sluigi	return 0;
626259412Sluigi
627259412Sluigiout:
628259412Sluigi	if_rele(ifp);
629259412Sluigi
630259412Sluigi	return error;
631259412Sluigi}
632259412Sluigi
633259412Sluigi
634259412Sluigi/* Process NETMAP_BDG_ATTACH and NETMAP_BDG_DETACH */
635259412Sluigistatic int
636259412Sluiginm_bdg_attach(struct nmreq *nmr)
637259412Sluigi{
638259412Sluigi	struct netmap_adapter *na;
639259412Sluigi	struct netmap_if *nifp;
640259412Sluigi	struct netmap_priv_d *npriv;
641259412Sluigi	struct netmap_bwrap_adapter *bna;
642259412Sluigi	int error;
643259412Sluigi
644259412Sluigi	npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO);
645259412Sluigi	if (npriv == NULL)
646259412Sluigi		return ENOMEM;
647260700Sluigi
648259412Sluigi	NMG_LOCK();
649260700Sluigi
650260368Sluigi	error = netmap_get_bdg_na(nmr, &na, 1 /* create if not exists */);
651259412Sluigi	if (error) /* no device, or another bridge or user owns the device */
652259412Sluigi		goto unlock_exit;
653260700Sluigi
654260368Sluigi	if (na == NULL) { /* VALE prefix missing */
655259412Sluigi		error = EINVAL;
656260368Sluigi		goto unlock_exit;
657259412Sluigi	}
658259412Sluigi
659259412Sluigi	if (na->active_fds > 0) { /* already registered */
660259412Sluigi		error = EBUSY;
661259412Sluigi		goto unref_exit;
662259412Sluigi	}
663259412Sluigi
664261909Sluigi	nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags, &error);
665259412Sluigi	if (!nifp) {
666259412Sluigi		goto unref_exit;
667259412Sluigi	}
668259412Sluigi
669259412Sluigi	bna = (struct netmap_bwrap_adapter*)na;
670259412Sluigi	bna->na_kpriv = npriv;
671259412Sluigi	NMG_UNLOCK();
672259412Sluigi	ND("registered %s to netmap-mode", NM_IFPNAME(na->ifp));
673259412Sluigi	return 0;
674259412Sluigi
675259412Sluigiunref_exit:
676259412Sluigi	netmap_adapter_put(na);
677259412Sluigiunlock_exit:
678259412Sluigi	NMG_UNLOCK();
679259412Sluigi	bzero(npriv, sizeof(*npriv));
680259412Sluigi	free(npriv, M_DEVBUF);
681259412Sluigi	return error;
682259412Sluigi}
683259412Sluigi
684260368Sluigi
685259412Sluigistatic int
686259412Sluiginm_bdg_detach(struct nmreq *nmr)
687259412Sluigi{
688259412Sluigi	struct netmap_adapter *na;
689259412Sluigi	int error;
690259412Sluigi	struct netmap_bwrap_adapter *bna;
691259412Sluigi	int last_instance;
692259412Sluigi
693259412Sluigi	NMG_LOCK();
694260368Sluigi	error = netmap_get_bdg_na(nmr, &na, 0 /* don't create */);
695259412Sluigi	if (error) { /* no device, or another bridge or user owns the device */
696259412Sluigi		goto unlock_exit;
697259412Sluigi	}
698260700Sluigi
699260368Sluigi	if (na == NULL) { /* VALE prefix missing */
700259412Sluigi		error = EINVAL;
701260368Sluigi		goto unlock_exit;
702259412Sluigi	}
703260368Sluigi
704259412Sluigi	bna = (struct netmap_bwrap_adapter *)na;
705259412Sluigi
706259412Sluigi	if (na->active_fds == 0) { /* not registered */
707259412Sluigi		error = EINVAL;
708259412Sluigi		goto unref_exit;
709259412Sluigi	}
710259412Sluigi
711259412Sluigi	last_instance = netmap_dtor_locked(bna->na_kpriv); /* unregister */
712259412Sluigi	if (!last_instance) {
713259412Sluigi		D("--- error, trying to detach an entry with active mmaps");
714259412Sluigi		error = EINVAL;
715259412Sluigi	} else {
716259412Sluigi		struct netmap_priv_d *npriv = bna->na_kpriv;
717259412Sluigi
718259412Sluigi		bna->na_kpriv = NULL;
719259412Sluigi		D("deleting priv");
720259412Sluigi
721259412Sluigi		bzero(npriv, sizeof(*npriv));
722259412Sluigi		free(npriv, M_DEVBUF);
723259412Sluigi	}
724259412Sluigi
725259412Sluigiunref_exit:
726259412Sluigi	netmap_adapter_put(na);
727259412Sluigiunlock_exit:
728259412Sluigi	NMG_UNLOCK();
729259412Sluigi	return error;
730259412Sluigi
731259412Sluigi}
732259412Sluigi
733259412Sluigi
734259412Sluigi/* exported to kernel callers, e.g. OVS ?
735259412Sluigi * Entry point.
736259412Sluigi * Called without NMG_LOCK.
737259412Sluigi */
738259412Sluigiint
739259412Sluiginetmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func)
740259412Sluigi{
741259412Sluigi	struct nm_bridge *b;
742259412Sluigi	struct netmap_adapter *na;
743259412Sluigi	struct netmap_vp_adapter *vpna;
744259412Sluigi	struct ifnet *iter;
745259412Sluigi	char *name = nmr->nr_name;
746259412Sluigi	int cmd = nmr->nr_cmd, namelen = strlen(name);
747259412Sluigi	int error = 0, i, j;
748259412Sluigi
749259412Sluigi	switch (cmd) {
750259412Sluigi	case NETMAP_BDG_ATTACH:
751259412Sluigi		error = nm_bdg_attach(nmr);
752259412Sluigi		break;
753259412Sluigi
754259412Sluigi	case NETMAP_BDG_DETACH:
755259412Sluigi		error = nm_bdg_detach(nmr);
756259412Sluigi		break;
757259412Sluigi
758259412Sluigi	case NETMAP_BDG_LIST:
759259412Sluigi		/* this is used to enumerate bridges and ports */
760259412Sluigi		if (namelen) { /* look up indexes of bridge and port */
761259412Sluigi			if (strncmp(name, NM_NAME, strlen(NM_NAME))) {
762259412Sluigi				error = EINVAL;
763259412Sluigi				break;
764259412Sluigi			}
765259412Sluigi			NMG_LOCK();
766259412Sluigi			b = nm_find_bridge(name, 0 /* don't create */);
767259412Sluigi			if (!b) {
768259412Sluigi				error = ENOENT;
769259412Sluigi				NMG_UNLOCK();
770259412Sluigi				break;
771259412Sluigi			}
772259412Sluigi
773259412Sluigi			error = ENOENT;
774259412Sluigi			for (j = 0; j < b->bdg_active_ports; j++) {
775259412Sluigi				i = b->bdg_port_index[j];
776259412Sluigi				vpna = b->bdg_ports[i];
777259412Sluigi				if (vpna == NULL) {
778259412Sluigi					D("---AAAAAAAAARGH-------");
779259412Sluigi					continue;
780259412Sluigi				}
781259412Sluigi				iter = vpna->up.ifp;
782259412Sluigi				/* the former and the latter identify a
783259412Sluigi				 * virtual port and a NIC, respectively
784259412Sluigi				 */
785259412Sluigi				if (!strcmp(iter->if_xname, name)) {
786259412Sluigi					/* bridge index */
787259412Sluigi					nmr->nr_arg1 = b - nm_bridges;
788259412Sluigi					nmr->nr_arg2 = i; /* port index */
789259412Sluigi					error = 0;
790259412Sluigi					break;
791259412Sluigi				}
792259412Sluigi			}
793259412Sluigi			NMG_UNLOCK();
794259412Sluigi		} else {
795259412Sluigi			/* return the first non-empty entry starting from
796259412Sluigi			 * bridge nr_arg1 and port nr_arg2.
797259412Sluigi			 *
798259412Sluigi			 * Users can detect the end of the same bridge by
799259412Sluigi			 * seeing the new and old value of nr_arg1, and can
800259412Sluigi			 * detect the end of all the bridge by error != 0
801259412Sluigi			 */
802259412Sluigi			i = nmr->nr_arg1;
803259412Sluigi			j = nmr->nr_arg2;
804259412Sluigi
805259412Sluigi			NMG_LOCK();
806259412Sluigi			for (error = ENOENT; i < NM_BRIDGES; i++) {
807259412Sluigi				b = nm_bridges + i;
808259412Sluigi				if (j >= b->bdg_active_ports) {
809259412Sluigi					j = 0; /* following bridges scan from 0 */
810259412Sluigi					continue;
811259412Sluigi				}
812259412Sluigi				nmr->nr_arg1 = i;
813259412Sluigi				nmr->nr_arg2 = j;
814259412Sluigi				j = b->bdg_port_index[j];
815259412Sluigi				vpna = b->bdg_ports[j];
816259412Sluigi				iter = vpna->up.ifp;
817259412Sluigi				strncpy(name, iter->if_xname, (size_t)IFNAMSIZ);
818259412Sluigi				error = 0;
819259412Sluigi				break;
820259412Sluigi			}
821259412Sluigi			NMG_UNLOCK();
822259412Sluigi		}
823259412Sluigi		break;
824259412Sluigi
825259412Sluigi	case NETMAP_BDG_LOOKUP_REG:
826259412Sluigi		/* register a lookup function to the given bridge.
827259412Sluigi		 * nmr->nr_name may be just bridge's name (including ':'
828259412Sluigi		 * if it is not just NM_NAME).
829259412Sluigi		 */
830259412Sluigi		if (!func) {
831259412Sluigi			error = EINVAL;
832259412Sluigi			break;
833259412Sluigi		}
834259412Sluigi		NMG_LOCK();
835259412Sluigi		b = nm_find_bridge(name, 0 /* don't create */);
836259412Sluigi		if (!b) {
837259412Sluigi			error = EINVAL;
838259412Sluigi		} else {
839259412Sluigi			b->nm_bdg_lookup = func;
840259412Sluigi		}
841259412Sluigi		NMG_UNLOCK();
842259412Sluigi		break;
843259412Sluigi
844261909Sluigi	case NETMAP_BDG_VNET_HDR:
845261909Sluigi		/* Valid lengths for the virtio-net header are 0 (no header),
846261909Sluigi		   10 and 12. */
847261909Sluigi		if (nmr->nr_arg1 != 0 &&
848261909Sluigi			nmr->nr_arg1 != sizeof(struct nm_vnet_hdr) &&
849261909Sluigi				nmr->nr_arg1 != 12) {
850261909Sluigi			error = EINVAL;
851261909Sluigi			break;
852261909Sluigi		}
853259412Sluigi		NMG_LOCK();
854259412Sluigi		error = netmap_get_bdg_na(nmr, &na, 0);
855260368Sluigi		if (na && !error) {
856259412Sluigi			vpna = (struct netmap_vp_adapter *)na;
857261909Sluigi			vpna->virt_hdr_len = nmr->nr_arg1;
858261909Sluigi			if (vpna->virt_hdr_len)
859261909Sluigi				vpna->mfs = NETMAP_BDG_BUF_SIZE(na->nm_mem);
860261909Sluigi			D("Using vnet_hdr_len %d for %p", vpna->virt_hdr_len, vpna);
861260368Sluigi			netmap_adapter_put(na);
862259412Sluigi		}
863259412Sluigi		NMG_UNLOCK();
864259412Sluigi		break;
865259412Sluigi
866259412Sluigi	default:
867259412Sluigi		D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd);
868259412Sluigi		error = EINVAL;
869259412Sluigi		break;
870259412Sluigi	}
871259412Sluigi	return error;
872259412Sluigi}
873259412Sluigi
874259412Sluigistatic int
875259412Sluiginetmap_vp_krings_create(struct netmap_adapter *na)
876259412Sluigi{
877261909Sluigi	u_int tailroom;
878259412Sluigi	int error, i;
879259412Sluigi	uint32_t *leases;
880261909Sluigi	u_int nrx = netmap_real_rx_rings(na);
881259412Sluigi
882259412Sluigi	/*
883259412Sluigi	 * Leases are attached to RX rings on vale ports
884259412Sluigi	 */
885259412Sluigi	tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;
886259412Sluigi
887261909Sluigi	error = netmap_krings_create(na, tailroom);
888259412Sluigi	if (error)
889259412Sluigi		return error;
890259412Sluigi
891259412Sluigi	leases = na->tailroom;
892259412Sluigi
893259412Sluigi	for (i = 0; i < nrx; i++) { /* Receive rings */
894259412Sluigi		na->rx_rings[i].nkr_leases = leases;
895259412Sluigi		leases += na->num_rx_desc;
896259412Sluigi	}
897259412Sluigi
898259412Sluigi	error = nm_alloc_bdgfwd(na);
899259412Sluigi	if (error) {
900259412Sluigi		netmap_krings_delete(na);
901259412Sluigi		return error;
902259412Sluigi	}
903259412Sluigi
904259412Sluigi	return 0;
905259412Sluigi}
906259412Sluigi
907260368Sluigi
908259412Sluigistatic void
909259412Sluiginetmap_vp_krings_delete(struct netmap_adapter *na)
910259412Sluigi{
911259412Sluigi	nm_free_bdgfwd(na);
912259412Sluigi	netmap_krings_delete(na);
913259412Sluigi}
914259412Sluigi
915259412Sluigi
916259412Sluigistatic int
917259412Sluiginm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
918259412Sluigi	struct netmap_vp_adapter *na, u_int ring_nr);
919259412Sluigi
920259412Sluigi
921259412Sluigi/*
922259412Sluigi * Grab packets from a kring, move them into the ft structure
923259412Sluigi * associated to the tx (input) port. Max one instance per port,
924259412Sluigi * filtered on input (ioctl, poll or XXX).
925259412Sluigi * Returns the next position in the ring.
926259412Sluigi */
927259412Sluigistatic int
928259412Sluiginm_bdg_preflush(struct netmap_vp_adapter *na, u_int ring_nr,
929259412Sluigi	struct netmap_kring *kring, u_int end)
930259412Sluigi{
931259412Sluigi	struct netmap_ring *ring = kring->ring;
932259412Sluigi	struct nm_bdg_fwd *ft;
933259412Sluigi	u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
934259412Sluigi	u_int ft_i = 0;	/* start from 0 */
935259412Sluigi	u_int frags = 1; /* how many frags ? */
936259412Sluigi	struct nm_bridge *b = na->na_bdg;
937259412Sluigi
938259412Sluigi	/* To protect against modifications to the bridge we acquire a
939259412Sluigi	 * shared lock, waiting if we can sleep (if the source port is
940259412Sluigi	 * attached to a user process) or with a trylock otherwise (NICs).
941259412Sluigi	 */
942259412Sluigi	ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
943259412Sluigi	if (na->up.na_flags & NAF_BDG_MAYSLEEP)
944259412Sluigi		BDG_RLOCK(b);
945259412Sluigi	else if (!BDG_RTRYLOCK(b))
946259412Sluigi		return 0;
947259412Sluigi	ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
948259412Sluigi	ft = kring->nkr_ft;
949259412Sluigi
950259412Sluigi	for (; likely(j != end); j = nm_next(j, lim)) {
951259412Sluigi		struct netmap_slot *slot = &ring->slot[j];
952259412Sluigi		char *buf;
953259412Sluigi
954259412Sluigi		ft[ft_i].ft_len = slot->len;
955259412Sluigi		ft[ft_i].ft_flags = slot->flags;
956259412Sluigi
957259412Sluigi		ND("flags is 0x%x", slot->flags);
958259412Sluigi		/* this slot goes into a list so initialize the link field */
959259412Sluigi		ft[ft_i].ft_next = NM_FT_NULL;
960259412Sluigi		buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
961259412Sluigi			(void *)(uintptr_t)slot->ptr : BDG_NMB(&na->up, slot);
962259487Sluigi		__builtin_prefetch(buf);
963259412Sluigi		++ft_i;
964259412Sluigi		if (slot->flags & NS_MOREFRAG) {
965259412Sluigi			frags++;
966259412Sluigi			continue;
967259412Sluigi		}
968259412Sluigi		if (unlikely(netmap_verbose && frags > 1))
969259412Sluigi			RD(5, "%d frags at %d", frags, ft_i - frags);
970259412Sluigi		ft[ft_i - frags].ft_frags = frags;
971259412Sluigi		frags = 1;
972259412Sluigi		if (unlikely((int)ft_i >= bridge_batch))
973259412Sluigi			ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
974259412Sluigi	}
975259412Sluigi	if (frags > 1) {
976259412Sluigi		D("truncate incomplete fragment at %d (%d frags)", ft_i, frags);
977259412Sluigi		// ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG
978259412Sluigi		ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG;
979259412Sluigi		ft[ft_i - frags].ft_frags = frags - 1;
980259412Sluigi	}
981259412Sluigi	if (ft_i)
982259412Sluigi		ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
983259412Sluigi	BDG_RUNLOCK(b);
984259412Sluigi	return j;
985259412Sluigi}
986259412Sluigi
987259412Sluigi
988259412Sluigi/* ----- FreeBSD if_bridge hash function ------- */
989259412Sluigi
990259412Sluigi/*
991259412Sluigi * The following hash function is adapted from "Hash Functions" by Bob Jenkins
992259412Sluigi * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
993259412Sluigi *
994259412Sluigi * http://www.burtleburtle.net/bob/hash/spooky.html
995259412Sluigi */
996259412Sluigi#define mix(a, b, c)                                                    \
997259412Sluigido {                                                                    \
998259412Sluigi        a -= b; a -= c; a ^= (c >> 13);                                 \
999259412Sluigi        b -= c; b -= a; b ^= (a << 8);                                  \
1000259412Sluigi        c -= a; c -= b; c ^= (b >> 13);                                 \
1001259412Sluigi        a -= b; a -= c; a ^= (c >> 12);                                 \
1002259412Sluigi        b -= c; b -= a; b ^= (a << 16);                                 \
1003259412Sluigi        c -= a; c -= b; c ^= (b >> 5);                                  \
1004259412Sluigi        a -= b; a -= c; a ^= (c >> 3);                                  \
1005259412Sluigi        b -= c; b -= a; b ^= (a << 10);                                 \
1006259412Sluigi        c -= a; c -= b; c ^= (b >> 15);                                 \
1007259412Sluigi} while (/*CONSTCOND*/0)
1008259412Sluigi
1009260368Sluigi
1010259412Sluigistatic __inline uint32_t
1011259412Sluiginm_bridge_rthash(const uint8_t *addr)
1012259412Sluigi{
1013259412Sluigi        uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
1014259412Sluigi
1015259412Sluigi        b += addr[5] << 8;
1016259412Sluigi        b += addr[4];
1017259412Sluigi        a += addr[3] << 24;
1018259412Sluigi        a += addr[2] << 16;
1019259412Sluigi        a += addr[1] << 8;
1020259412Sluigi        a += addr[0];
1021259412Sluigi
1022259412Sluigi        mix(a, b, c);
1023259412Sluigi#define BRIDGE_RTHASH_MASK	(NM_BDG_HASH-1)
1024259412Sluigi        return (c & BRIDGE_RTHASH_MASK);
1025259412Sluigi}
1026259412Sluigi
1027259412Sluigi#undef mix
1028259412Sluigi
1029259412Sluigi
1030259412Sluigistatic int
1031259412Sluigibdg_netmap_reg(struct netmap_adapter *na, int onoff)
1032259412Sluigi{
1033259412Sluigi	struct netmap_vp_adapter *vpna =
1034259412Sluigi		(struct netmap_vp_adapter*)na;
1035259412Sluigi	struct ifnet *ifp = na->ifp;
1036259412Sluigi
1037259412Sluigi	/* the interface is already attached to the bridge,
1038259412Sluigi	 * so we only need to toggle IFCAP_NETMAP.
1039259412Sluigi	 */
1040259412Sluigi	BDG_WLOCK(vpna->na_bdg);
1041259412Sluigi	if (onoff) {
1042259412Sluigi		ifp->if_capenable |= IFCAP_NETMAP;
1043259412Sluigi	} else {
1044259412Sluigi		ifp->if_capenable &= ~IFCAP_NETMAP;
1045259412Sluigi	}
1046259412Sluigi	BDG_WUNLOCK(vpna->na_bdg);
1047259412Sluigi	return 0;
1048259412Sluigi}
1049259412Sluigi
1050259412Sluigi
1051259412Sluigi/*
1052259412Sluigi * Lookup function for a learning bridge.
1053259412Sluigi * Update the hash table with the source address,
1054259412Sluigi * and then returns the destination port index, and the
1055259412Sluigi * ring in *dst_ring (at the moment, always use ring 0)
1056259412Sluigi */
1057259412Sluigiu_int
1058259412Sluiginetmap_bdg_learning(char *buf, u_int buf_len, uint8_t *dst_ring,
1059259412Sluigi		struct netmap_vp_adapter *na)
1060259412Sluigi{
1061259412Sluigi	struct nm_hash_ent *ht = na->na_bdg->ht;
1062259412Sluigi	uint32_t sh, dh;
1063259412Sluigi	u_int dst, mysrc = na->bdg_port;
1064259412Sluigi	uint64_t smac, dmac;
1065259412Sluigi
1066259412Sluigi	if (buf_len < 14) {
1067259412Sluigi		D("invalid buf length %d", buf_len);
1068259412Sluigi		return NM_BDG_NOPORT;
1069259412Sluigi	}
1070259412Sluigi	dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
1071259412Sluigi	smac = le64toh(*(uint64_t *)(buf + 4));
1072259412Sluigi	smac >>= 16;
1073259412Sluigi
1074259412Sluigi	/*
1075259412Sluigi	 * The hash is somewhat expensive, there might be some
1076259412Sluigi	 * worthwhile optimizations here.
1077259412Sluigi	 */
1078259412Sluigi	if ((buf[6] & 1) == 0) { /* valid src */
1079259412Sluigi		uint8_t *s = buf+6;
1080259412Sluigi		sh = nm_bridge_rthash(s); // XXX hash of source
1081259412Sluigi		/* update source port forwarding entry */
1082259412Sluigi		ht[sh].mac = smac;	/* XXX expire ? */
1083259412Sluigi		ht[sh].ports = mysrc;
1084259412Sluigi		if (netmap_verbose)
1085259412Sluigi		    D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
1086259412Sluigi			s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
1087259412Sluigi	}
1088259412Sluigi	dst = NM_BDG_BROADCAST;
1089259412Sluigi	if ((buf[0] & 1) == 0) { /* unicast */
1090259412Sluigi		dh = nm_bridge_rthash(buf); // XXX hash of dst
1091259412Sluigi		if (ht[dh].mac == dmac) {	/* found dst */
1092259412Sluigi			dst = ht[dh].ports;
1093259412Sluigi		}
1094259412Sluigi		/* XXX otherwise return NM_BDG_UNKNOWN ? */
1095259412Sluigi	}
1096259412Sluigi	*dst_ring = 0;
1097259412Sluigi	return dst;
1098259412Sluigi}
1099259412Sluigi
1100259412Sluigi
1101259412Sluigi/*
1102260368Sluigi * Available space in the ring. Only used in VALE code
1103260368Sluigi * and only with is_rx = 1
1104260368Sluigi */
1105260368Sluigistatic inline uint32_t
1106260368Sluiginm_kr_space(struct netmap_kring *k, int is_rx)
1107260368Sluigi{
1108260368Sluigi	int space;
1109260368Sluigi
1110260368Sluigi	if (is_rx) {
1111260368Sluigi		int busy = k->nkr_hwlease - k->nr_hwcur;
1112260368Sluigi		if (busy < 0)
1113260368Sluigi			busy += k->nkr_num_slots;
1114260368Sluigi		space = k->nkr_num_slots - 1 - busy;
1115260368Sluigi	} else {
1116260368Sluigi		/* XXX never used in this branch */
1117260368Sluigi		space = k->nr_hwtail - k->nkr_hwlease;
1118260368Sluigi		if (space < 0)
1119260368Sluigi			space += k->nkr_num_slots;
1120260368Sluigi	}
1121260368Sluigi#if 0
1122260368Sluigi	// sanity check
1123260368Sluigi	if (k->nkr_hwlease >= k->nkr_num_slots ||
1124260368Sluigi		k->nr_hwcur >= k->nkr_num_slots ||
1125260368Sluigi		k->nr_tail >= k->nkr_num_slots ||
1126260368Sluigi		busy < 0 ||
1127260368Sluigi		busy >= k->nkr_num_slots) {
1128260368Sluigi		D("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d",			k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
1129260368Sluigi			k->nkr_lease_idx, k->nkr_num_slots);
1130260368Sluigi	}
1131260368Sluigi#endif
1132260368Sluigi	return space;
1133260368Sluigi}
1134260368Sluigi
1135260368Sluigi
1136260368Sluigi
1137260368Sluigi
1138260368Sluigi/* make a lease on the kring for N positions. return the
1139260368Sluigi * lease index
1140260368Sluigi * XXX only used in VALE code and with is_rx = 1
1141260368Sluigi */
1142260368Sluigistatic inline uint32_t
1143260368Sluiginm_kr_lease(struct netmap_kring *k, u_int n, int is_rx)
1144260368Sluigi{
1145260368Sluigi	uint32_t lim = k->nkr_num_slots - 1;
1146260368Sluigi	uint32_t lease_idx = k->nkr_lease_idx;
1147260368Sluigi
1148260368Sluigi	k->nkr_leases[lease_idx] = NR_NOSLOT;
1149260368Sluigi	k->nkr_lease_idx = nm_next(lease_idx, lim);
1150260368Sluigi
1151260368Sluigi	if (n > nm_kr_space(k, is_rx)) {
1152260368Sluigi		D("invalid request for %d slots", n);
1153260368Sluigi		panic("x");
1154260368Sluigi	}
1155260368Sluigi	/* XXX verify that there are n slots */
1156260368Sluigi	k->nkr_hwlease += n;
1157260368Sluigi	if (k->nkr_hwlease > lim)
1158260368Sluigi		k->nkr_hwlease -= lim + 1;
1159260368Sluigi
1160260368Sluigi	if (k->nkr_hwlease >= k->nkr_num_slots ||
1161260368Sluigi		k->nr_hwcur >= k->nkr_num_slots ||
1162260368Sluigi		k->nr_hwtail >= k->nkr_num_slots ||
1163260368Sluigi		k->nkr_lease_idx >= k->nkr_num_slots) {
1164260368Sluigi		D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d",
1165260368Sluigi			k->na->ifp->if_xname,
1166260368Sluigi			k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
1167260368Sluigi			k->nkr_lease_idx, k->nkr_num_slots);
1168260368Sluigi	}
1169260368Sluigi	return lease_idx;
1170260368Sluigi}
1171260368Sluigi
1172260368Sluigi/*
1173259412Sluigi * This flush routine supports only unicast and broadcast but a large
1174259412Sluigi * number of ports, and lets us replace the learn and dispatch functions.
1175259412Sluigi */
1176259412Sluigiint
1177259412Sluiginm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
1178259412Sluigi		u_int ring_nr)
1179259412Sluigi{
1180259412Sluigi	struct nm_bdg_q *dst_ents, *brddst;
1181259412Sluigi	uint16_t num_dsts = 0, *dsts;
1182259412Sluigi	struct nm_bridge *b = na->na_bdg;
1183259412Sluigi	u_int i, j, me = na->bdg_port;
1184259412Sluigi
1185259412Sluigi	/*
1186259412Sluigi	 * The work area (pointed by ft) is followed by an array of
1187259412Sluigi	 * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
1188259412Sluigi	 * queues per port plus one for the broadcast traffic.
1189259412Sluigi	 * Then we have an array of destination indexes.
1190259412Sluigi	 */
1191259412Sluigi	dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
1192259412Sluigi	dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
1193259412Sluigi
1194259412Sluigi	/* first pass: find a destination for each packet in the batch */
1195259412Sluigi	for (i = 0; likely(i < n); i += ft[i].ft_frags) {
1196259412Sluigi		uint8_t dst_ring = ring_nr; /* default, same ring as origin */
1197259412Sluigi		uint16_t dst_port, d_i;
1198259412Sluigi		struct nm_bdg_q *d;
1199259412Sluigi		uint8_t *buf = ft[i].ft_buf;
1200259412Sluigi		u_int len = ft[i].ft_len;
1201259412Sluigi
1202259412Sluigi		ND("slot %d frags %d", i, ft[i].ft_frags);
1203261909Sluigi		/* Drop the packet if the virtio-net header is not into the first
1204259412Sluigi		   fragment nor at the very beginning of the second. */
1205261909Sluigi		if (unlikely(na->virt_hdr_len > len))
1206259412Sluigi			continue;
1207261909Sluigi		if (len == na->virt_hdr_len) {
1208259412Sluigi			buf = ft[i+1].ft_buf;
1209259412Sluigi			len = ft[i+1].ft_len;
1210259412Sluigi		} else {
1211261909Sluigi			buf += na->virt_hdr_len;
1212261909Sluigi			len -= na->virt_hdr_len;
1213259412Sluigi		}
1214259412Sluigi		dst_port = b->nm_bdg_lookup(buf, len, &dst_ring, na);
1215259412Sluigi		if (netmap_verbose > 255)
1216259412Sluigi			RD(5, "slot %d port %d -> %d", i, me, dst_port);
1217259412Sluigi		if (dst_port == NM_BDG_NOPORT)
1218259412Sluigi			continue; /* this packet is identified to be dropped */
1219259412Sluigi		else if (unlikely(dst_port > NM_BDG_MAXPORTS))
1220259412Sluigi			continue;
1221259412Sluigi		else if (dst_port == NM_BDG_BROADCAST)
1222259412Sluigi			dst_ring = 0; /* broadcasts always go to ring 0 */
1223259412Sluigi		else if (unlikely(dst_port == me ||
1224259412Sluigi		    !b->bdg_ports[dst_port]))
1225259412Sluigi			continue;
1226259412Sluigi
1227259412Sluigi		/* get a position in the scratch pad */
1228259412Sluigi		d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
1229259412Sluigi		d = dst_ents + d_i;
1230259412Sluigi
1231259412Sluigi		/* append the first fragment to the list */
1232259412Sluigi		if (d->bq_head == NM_FT_NULL) { /* new destination */
1233259412Sluigi			d->bq_head = d->bq_tail = i;
1234259412Sluigi			/* remember this position to be scanned later */
1235259412Sluigi			if (dst_port != NM_BDG_BROADCAST)
1236259412Sluigi				dsts[num_dsts++] = d_i;
1237259412Sluigi		} else {
1238259412Sluigi			ft[d->bq_tail].ft_next = i;
1239259412Sluigi			d->bq_tail = i;
1240259412Sluigi		}
1241259412Sluigi		d->bq_len += ft[i].ft_frags;
1242259412Sluigi	}
1243259412Sluigi
1244259412Sluigi	/*
1245259412Sluigi	 * Broadcast traffic goes to ring 0 on all destinations.
1246259412Sluigi	 * So we need to add these rings to the list of ports to scan.
1247259412Sluigi	 * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
1248259412Sluigi	 * expensive. We should keep a compact list of active destinations
1249259412Sluigi	 * so we could shorten this loop.
1250259412Sluigi	 */
1251259412Sluigi	brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
1252259412Sluigi	if (brddst->bq_head != NM_FT_NULL) {
1253259412Sluigi		for (j = 0; likely(j < b->bdg_active_ports); j++) {
1254259412Sluigi			uint16_t d_i;
1255259412Sluigi			i = b->bdg_port_index[j];
1256259412Sluigi			if (unlikely(i == me))
1257259412Sluigi				continue;
1258259412Sluigi			d_i = i * NM_BDG_MAXRINGS;
1259259412Sluigi			if (dst_ents[d_i].bq_head == NM_FT_NULL)
1260259412Sluigi				dsts[num_dsts++] = d_i;
1261259412Sluigi		}
1262259412Sluigi	}
1263259412Sluigi
1264259412Sluigi	ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
1265259412Sluigi	/* second pass: scan destinations (XXX will be modular somehow) */
1266259412Sluigi	for (i = 0; i < num_dsts; i++) {
1267259412Sluigi		struct ifnet *dst_ifp;
1268259412Sluigi		struct netmap_vp_adapter *dst_na;
1269259412Sluigi		struct netmap_kring *kring;
1270259412Sluigi		struct netmap_ring *ring;
1271261909Sluigi		u_int dst_nr, lim, j, d_i, next, brd_next;
1272259412Sluigi		u_int needed, howmany;
1273259412Sluigi		int retry = netmap_txsync_retry;
1274259412Sluigi		struct nm_bdg_q *d;
1275259412Sluigi		uint32_t my_start = 0, lease_idx = 0;
1276259412Sluigi		int nrings;
1277261909Sluigi		int virt_hdr_mismatch = 0;
1278259412Sluigi
1279259412Sluigi		d_i = dsts[i];
1280259412Sluigi		ND("second pass %d port %d", i, d_i);
1281259412Sluigi		d = dst_ents + d_i;
1282259412Sluigi		// XXX fix the division
1283259412Sluigi		dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
1284259412Sluigi		/* protect from the lookup function returning an inactive
1285259412Sluigi		 * destination port
1286259412Sluigi		 */
1287259412Sluigi		if (unlikely(dst_na == NULL))
1288259412Sluigi			goto cleanup;
1289259412Sluigi		if (dst_na->up.na_flags & NAF_SW_ONLY)
1290259412Sluigi			goto cleanup;
1291259412Sluigi		dst_ifp = dst_na->up.ifp;
1292259412Sluigi		/*
1293259412Sluigi		 * The interface may be in !netmap mode in two cases:
1294259412Sluigi		 * - when na is attached but not activated yet;
1295259412Sluigi		 * - when na is being deactivated but is still attached.
1296259412Sluigi		 */
1297259412Sluigi		if (unlikely(!(dst_ifp->if_capenable & IFCAP_NETMAP))) {
1298259412Sluigi			ND("not in netmap mode!");
1299259412Sluigi			goto cleanup;
1300259412Sluigi		}
1301259412Sluigi
1302259412Sluigi		/* there is at least one either unicast or broadcast packet */
1303259412Sluigi		brd_next = brddst->bq_head;
1304259412Sluigi		next = d->bq_head;
1305259412Sluigi		/* we need to reserve this many slots. If fewer are
1306259412Sluigi		 * available, some packets will be dropped.
1307259412Sluigi		 * Packets may have multiple fragments, so we may not use
1308259412Sluigi		 * there is a chance that we may not use all of the slots
1309259412Sluigi		 * we have claimed, so we will need to handle the leftover
1310259412Sluigi		 * ones when we regain the lock.
1311259412Sluigi		 */
1312259412Sluigi		needed = d->bq_len + brddst->bq_len;
1313259412Sluigi
1314261909Sluigi		if (unlikely(dst_na->virt_hdr_len != na->virt_hdr_len)) {
1315261909Sluigi			/* There is a virtio-net header/offloadings mismatch between
1316261909Sluigi			 * source and destination. The slower mismatch datapath will
1317261909Sluigi			 * be used to cope with all the mismatches.
1318261909Sluigi			 */
1319261909Sluigi			virt_hdr_mismatch = 1;
1320261909Sluigi			if (dst_na->mfs < na->mfs) {
1321261909Sluigi				/* We may need to do segmentation offloadings, and so
1322261909Sluigi				 * we may need a number of destination slots greater
1323261909Sluigi				 * than the number of input slots ('needed').
1324261909Sluigi				 * We look for the smallest integer 'x' which satisfies:
1325261909Sluigi				 *	needed * na->mfs + x * H <= x * na->mfs
1326261909Sluigi				 * where 'H' is the length of the longest header that may
1327261909Sluigi				 * be replicated in the segmentation process (e.g. for
1328261909Sluigi				 * TCPv4 we must account for ethernet header, IP header
1329261909Sluigi				 * and TCPv4 header).
1330261909Sluigi				 */
1331261909Sluigi				needed = (needed * na->mfs) /
1332261909Sluigi						(dst_na->mfs - WORST_CASE_GSO_HEADER) + 1;
1333261909Sluigi				ND(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed);
1334261909Sluigi			}
1335261909Sluigi		}
1336261909Sluigi
1337259412Sluigi		ND(5, "pass 2 dst %d is %x %s",
1338259412Sluigi			i, d_i, is_vp ? "virtual" : "nic/host");
1339259412Sluigi		dst_nr = d_i & (NM_BDG_MAXRINGS-1);
1340259412Sluigi		nrings = dst_na->up.num_rx_rings;
1341259412Sluigi		if (dst_nr >= nrings)
1342259412Sluigi			dst_nr = dst_nr % nrings;
1343259412Sluigi		kring = &dst_na->up.rx_rings[dst_nr];
1344259412Sluigi		ring = kring->ring;
1345259412Sluigi		lim = kring->nkr_num_slots - 1;
1346259412Sluigi
1347259412Sluigiretry:
1348259412Sluigi
1349261909Sluigi		if (dst_na->retry && retry) {
1350261909Sluigi			/* try to get some free slot from the previous run */
1351261909Sluigi			dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1352261909Sluigi		}
1353259412Sluigi		/* reserve the buffers in the queue and an entry
1354259412Sluigi		 * to report completion, and drop lock.
1355259412Sluigi		 * XXX this might become a helper function.
1356259412Sluigi		 */
1357259412Sluigi		mtx_lock(&kring->q_lock);
1358259412Sluigi		if (kring->nkr_stopped) {
1359259412Sluigi			mtx_unlock(&kring->q_lock);
1360259412Sluigi			goto cleanup;
1361259412Sluigi		}
1362259412Sluigi		my_start = j = kring->nkr_hwlease;
1363259412Sluigi		howmany = nm_kr_space(kring, 1);
1364259412Sluigi		if (needed < howmany)
1365259412Sluigi			howmany = needed;
1366259412Sluigi		lease_idx = nm_kr_lease(kring, howmany, 1);
1367259412Sluigi		mtx_unlock(&kring->q_lock);
1368259412Sluigi
1369259412Sluigi		/* only retry if we need more than available slots */
1370259412Sluigi		if (retry && needed <= howmany)
1371259412Sluigi			retry = 0;
1372259412Sluigi
1373259412Sluigi		/* copy to the destination queue */
1374259412Sluigi		while (howmany > 0) {
1375259412Sluigi			struct netmap_slot *slot;
1376259412Sluigi			struct nm_bdg_fwd *ft_p, *ft_end;
1377259412Sluigi			u_int cnt;
1378259412Sluigi
1379259412Sluigi			/* find the queue from which we pick next packet.
1380259412Sluigi			 * NM_FT_NULL is always higher than valid indexes
1381259412Sluigi			 * so we never dereference it if the other list
1382259412Sluigi			 * has packets (and if both are empty we never
1383259412Sluigi			 * get here).
1384259412Sluigi			 */
1385259412Sluigi			if (next < brd_next) {
1386259412Sluigi				ft_p = ft + next;
1387259412Sluigi				next = ft_p->ft_next;
1388259412Sluigi			} else { /* insert broadcast */
1389259412Sluigi				ft_p = ft + brd_next;
1390259412Sluigi				brd_next = ft_p->ft_next;
1391259412Sluigi			}
1392259412Sluigi			cnt = ft_p->ft_frags; // cnt > 0
1393259412Sluigi			if (unlikely(cnt > howmany))
1394259412Sluigi			    break; /* no more space */
1395259412Sluigi			if (netmap_verbose && cnt > 1)
1396259412Sluigi				RD(5, "rx %d frags to %d", cnt, j);
1397259412Sluigi			ft_end = ft_p + cnt;
1398261909Sluigi			if (unlikely(virt_hdr_mismatch)) {
1399261909Sluigi				bdg_mismatch_datapath(na, dst_na, ft_p, ring, &j, lim, &howmany);
1400261909Sluigi			} else {
1401261909Sluigi				howmany -= cnt;
1402261909Sluigi				do {
1403261909Sluigi					char *dst, *src = ft_p->ft_buf;
1404261909Sluigi					size_t copy_len = ft_p->ft_len, dst_len = copy_len;
1405259412Sluigi
1406261909Sluigi					slot = &ring->slot[j];
1407261909Sluigi					dst = BDG_NMB(&dst_na->up, slot);
1408259412Sluigi
1409261909Sluigi					ND("send [%d] %d(%d) bytes at %s:%d",
1410261909Sluigi							i, (int)copy_len, (int)dst_len,
1411261909Sluigi							NM_IFPNAME(dst_ifp), j);
1412261909Sluigi					/* round to a multiple of 64 */
1413261909Sluigi					copy_len = (copy_len + 63) & ~63;
1414260368Sluigi
1415261909Sluigi					if (ft_p->ft_flags & NS_INDIRECT) {
1416261909Sluigi						if (copyin(src, dst, copy_len)) {
1417261909Sluigi							// invalid user pointer, pretend len is 0
1418261909Sluigi							dst_len = 0;
1419261909Sluigi						}
1420261909Sluigi					} else {
1421261909Sluigi						//memcpy(dst, src, copy_len);
1422261909Sluigi						pkt_copy(src, dst, (int)copy_len);
1423261909Sluigi					}
1424261909Sluigi					slot->len = dst_len;
1425261909Sluigi					slot->flags = (cnt << 8)| NS_MOREFRAG;
1426261909Sluigi					j = nm_next(j, lim);
1427261909Sluigi					needed--;
1428261909Sluigi					ft_p++;
1429261909Sluigi				} while (ft_p != ft_end);
1430261909Sluigi				slot->flags = (cnt << 8); /* clear flag on last entry */
1431261909Sluigi			}
1432259412Sluigi			/* are we done ? */
1433259412Sluigi			if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
1434259412Sluigi				break;
1435259412Sluigi		}
1436259412Sluigi		{
1437259412Sluigi		    /* current position */
1438259412Sluigi		    uint32_t *p = kring->nkr_leases; /* shorthand */
1439259412Sluigi		    uint32_t update_pos;
1440259412Sluigi		    int still_locked = 1;
1441259412Sluigi
1442259412Sluigi		    mtx_lock(&kring->q_lock);
1443259412Sluigi		    if (unlikely(howmany > 0)) {
1444259412Sluigi			/* not used all bufs. If i am the last one
1445259412Sluigi			 * i can recover the slots, otherwise must
1446259412Sluigi			 * fill them with 0 to mark empty packets.
1447259412Sluigi			 */
1448259412Sluigi			ND("leftover %d bufs", howmany);
1449259412Sluigi			if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
1450259412Sluigi			    /* yes i am the last one */
1451259412Sluigi			    ND("roll back nkr_hwlease to %d", j);
1452259412Sluigi			    kring->nkr_hwlease = j;
1453259412Sluigi			} else {
1454259412Sluigi			    while (howmany-- > 0) {
1455259412Sluigi				ring->slot[j].len = 0;
1456259412Sluigi				ring->slot[j].flags = 0;
1457259412Sluigi				j = nm_next(j, lim);
1458259412Sluigi			    }
1459259412Sluigi			}
1460259412Sluigi		    }
1461259412Sluigi		    p[lease_idx] = j; /* report I am done */
1462259412Sluigi
1463260368Sluigi		    update_pos = kring->nr_hwtail;
1464259412Sluigi
1465259412Sluigi		    if (my_start == update_pos) {
1466259412Sluigi			/* all slots before my_start have been reported,
1467259412Sluigi			 * so scan subsequent leases to see if other ranges
1468259412Sluigi			 * have been completed, and to a selwakeup or txsync.
1469259412Sluigi		         */
1470259412Sluigi			while (lease_idx != kring->nkr_lease_idx &&
1471259412Sluigi				p[lease_idx] != NR_NOSLOT) {
1472259412Sluigi			    j = p[lease_idx];
1473259412Sluigi			    p[lease_idx] = NR_NOSLOT;
1474259412Sluigi			    lease_idx = nm_next(lease_idx, lim);
1475259412Sluigi			}
1476259412Sluigi			/* j is the new 'write' position. j != my_start
1477259412Sluigi			 * means there are new buffers to report
1478259412Sluigi			 */
1479259412Sluigi			if (likely(j != my_start)) {
1480260368Sluigi				kring->nr_hwtail = j;
1481259412Sluigi				still_locked = 0;
1482259412Sluigi				mtx_unlock(&kring->q_lock);
1483261909Sluigi				dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1484259412Sluigi				if (dst_na->retry && retry--)
1485259412Sluigi					goto retry;
1486259412Sluigi			}
1487259412Sluigi		    }
1488259412Sluigi		    if (still_locked)
1489259412Sluigi			mtx_unlock(&kring->q_lock);
1490259412Sluigi		}
1491259412Sluigicleanup:
1492259412Sluigi		d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
1493259412Sluigi		d->bq_len = 0;
1494259412Sluigi	}
1495259412Sluigi	brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
1496259412Sluigi	brddst->bq_len = 0;
1497259412Sluigi	return 0;
1498259412Sluigi}
1499259412Sluigi
1500260368Sluigi
1501259412Sluigistatic int
1502259412Sluiginetmap_vp_txsync(struct netmap_vp_adapter *na, u_int ring_nr, int flags)
1503259412Sluigi{
1504259412Sluigi	struct netmap_kring *kring = &na->up.tx_rings[ring_nr];
1505260368Sluigi	u_int done;
1506260368Sluigi	u_int const lim = kring->nkr_num_slots - 1;
1507260368Sluigi	u_int const cur = kring->rcur;
1508259412Sluigi
1509259412Sluigi	if (bridge_batch <= 0) { /* testing only */
1510260368Sluigi		done = cur; // used all
1511259412Sluigi		goto done;
1512259412Sluigi	}
1513259412Sluigi	if (bridge_batch > NM_BDG_BATCH)
1514259412Sluigi		bridge_batch = NM_BDG_BATCH;
1515259412Sluigi
1516260368Sluigi	done = nm_bdg_preflush(na, ring_nr, kring, cur);
1517259412Sluigidone:
1518260368Sluigi	if (done != cur)
1519260368Sluigi		D("early break at %d/ %d, tail %d", done, cur, kring->nr_hwtail);
1520260368Sluigi	/*
1521260368Sluigi	 * packets between 'done' and 'cur' are left unsent.
1522260368Sluigi	 */
1523260368Sluigi	kring->nr_hwcur = done;
1524260368Sluigi	kring->nr_hwtail = nm_prev(done, lim);
1525260368Sluigi	nm_txsync_finalize(kring);
1526259412Sluigi	if (netmap_verbose)
1527259412Sluigi		D("%s ring %d flags %d", NM_IFPNAME(na->up.ifp), ring_nr, flags);
1528259412Sluigi	return 0;
1529259412Sluigi}
1530259412Sluigi
1531259412Sluigi
1532259412Sluigi/*
1533259412Sluigi * main dispatch routine for the bridge.
1534259412Sluigi * We already know that only one thread is running this.
1535259412Sluigi * we must run nm_bdg_preflush without lock.
1536259412Sluigi */
1537259412Sluigistatic int
1538259412Sluigibdg_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
1539259412Sluigi{
1540259412Sluigi	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
1541259412Sluigi	return netmap_vp_txsync(vpna, ring_nr, flags);
1542259412Sluigi}
1543259412Sluigi
1544259412Sluigistatic int
1545260368Sluiginetmap_vp_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
1546259412Sluigi{
1547259412Sluigi	struct netmap_kring *kring = &na->rx_rings[ring_nr];
1548259412Sluigi	struct netmap_ring *ring = kring->ring;
1549260368Sluigi	u_int nm_i, lim = kring->nkr_num_slots - 1;
1550260368Sluigi	u_int head = nm_rxsync_prologue(kring);
1551259412Sluigi	int n;
1552259412Sluigi
1553260368Sluigi	if (head > lim) {
1554259412Sluigi		D("ouch dangerous reset!!!");
1555259412Sluigi		n = netmap_ring_reinit(kring);
1556259412Sluigi		goto done;
1557259412Sluigi	}
1558259412Sluigi
1559260368Sluigi	/* First part, import newly received packets. */
1560260368Sluigi	/* actually nothing to do here, they are already in the kring */
1561259412Sluigi
1562260368Sluigi	/* Second part, skip past packets that userspace has released. */
1563260368Sluigi	nm_i = kring->nr_hwcur;
1564260368Sluigi	if (nm_i != head) {
1565260368Sluigi		/* consistency check, but nothing really important here */
1566260368Sluigi		for (n = 0; likely(nm_i != head); n++) {
1567260368Sluigi			struct netmap_slot *slot = &ring->slot[nm_i];
1568259412Sluigi			void *addr = BDG_NMB(na, slot);
1569259412Sluigi
1570259412Sluigi			if (addr == netmap_buffer_base) { /* bad buf */
1571259412Sluigi				D("bad buffer index %d, ignore ?",
1572259412Sluigi					slot->buf_idx);
1573259412Sluigi			}
1574259412Sluigi			slot->flags &= ~NS_BUF_CHANGED;
1575260368Sluigi			nm_i = nm_next(nm_i, lim);
1576259412Sluigi		}
1577260368Sluigi		kring->nr_hwcur = head;
1578259412Sluigi	}
1579260368Sluigi
1580259412Sluigi	/* tell userspace that there are new packets */
1581260368Sluigi	nm_rxsync_finalize(kring);
1582259412Sluigi	n = 0;
1583259412Sluigidone:
1584260368Sluigi	return n;
1585260368Sluigi}
1586260368Sluigi
1587260368Sluigi/*
1588260368Sluigi * user process reading from a VALE switch.
1589260368Sluigi * Already protected against concurrent calls from userspace,
1590260368Sluigi * but we must acquire the queue's lock to protect against
1591260368Sluigi * writers on the same queue.
1592260368Sluigi */
1593260368Sluigistatic int
1594260368Sluigibdg_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
1595260368Sluigi{
1596260368Sluigi	struct netmap_kring *kring = &na->rx_rings[ring_nr];
1597260368Sluigi	int n;
1598260368Sluigi
1599260368Sluigi	mtx_lock(&kring->q_lock);
1600260368Sluigi	n = netmap_vp_rxsync(na, ring_nr, flags);
1601259412Sluigi	mtx_unlock(&kring->q_lock);
1602259412Sluigi	return n;
1603259412Sluigi}
1604259412Sluigi
1605260368Sluigi
1606259412Sluigistatic int
1607259412Sluigibdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp)
1608259412Sluigi{
1609259412Sluigi	struct netmap_vp_adapter *vpna;
1610259412Sluigi	struct netmap_adapter *na;
1611259412Sluigi	int error;
1612261909Sluigi	u_int npipes = 0;
1613259412Sluigi
1614259412Sluigi	vpna = malloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO);
1615259412Sluigi	if (vpna == NULL)
1616259412Sluigi		return ENOMEM;
1617259412Sluigi
1618259412Sluigi 	na = &vpna->up;
1619259412Sluigi
1620259412Sluigi	na->ifp = ifp;
1621259412Sluigi
1622259412Sluigi	/* bound checking */
1623259412Sluigi	na->num_tx_rings = nmr->nr_tx_rings;
1624259412Sluigi	nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1625259412Sluigi	nmr->nr_tx_rings = na->num_tx_rings; // write back
1626259412Sluigi	na->num_rx_rings = nmr->nr_rx_rings;
1627259412Sluigi	nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1628259412Sluigi	nmr->nr_rx_rings = na->num_rx_rings; // write back
1629259412Sluigi	nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE,
1630259412Sluigi			1, NM_BDG_MAXSLOTS, NULL);
1631259412Sluigi	na->num_tx_desc = nmr->nr_tx_slots;
1632259412Sluigi	nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE,
1633259412Sluigi			1, NM_BDG_MAXSLOTS, NULL);
1634261909Sluigi	/* validate number of pipes. We want at least 1,
1635261909Sluigi	 * but probably can do with some more.
1636261909Sluigi	 * So let's use 2 as default (when 0 is supplied)
1637261909Sluigi	 */
1638261909Sluigi	npipes = nmr->nr_arg1;
1639261909Sluigi	nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL);
1640261909Sluigi	nmr->nr_arg1 = npipes;	/* write back */
1641261909Sluigi	/* validate extra bufs */
1642261909Sluigi	nm_bound_var(&nmr->nr_arg3, 0, 0,
1643261909Sluigi			128*NM_BDG_MAXSLOTS, NULL);
1644259412Sluigi	na->num_rx_desc = nmr->nr_rx_slots;
1645261909Sluigi	vpna->virt_hdr_len = 0;
1646261909Sluigi	vpna->mfs = 1514;
1647261909Sluigi	/*if (vpna->mfs > netmap_buf_size)  TODO netmap_buf_size is zero??
1648261909Sluigi		vpna->mfs = netmap_buf_size; */
1649261909Sluigi        if (netmap_verbose)
1650261909Sluigi		D("max frame size %u", vpna->mfs);
1651259412Sluigi
1652259412Sluigi	na->na_flags |= NAF_BDG_MAYSLEEP | NAF_MEM_OWNER;
1653259412Sluigi	na->nm_txsync = bdg_netmap_txsync;
1654259412Sluigi	na->nm_rxsync = bdg_netmap_rxsync;
1655259412Sluigi	na->nm_register = bdg_netmap_reg;
1656259412Sluigi	na->nm_dtor = netmap_adapter_vp_dtor;
1657259412Sluigi	na->nm_krings_create = netmap_vp_krings_create;
1658259412Sluigi	na->nm_krings_delete = netmap_vp_krings_delete;
1659259412Sluigi	na->nm_mem = netmap_mem_private_new(NM_IFPNAME(na->ifp),
1660259412Sluigi			na->num_tx_rings, na->num_tx_desc,
1661261909Sluigi			na->num_rx_rings, na->num_rx_desc,
1662261909Sluigi			nmr->nr_arg3, npipes, &error);
1663261909Sluigi	if (na->nm_mem == NULL)
1664261909Sluigi		goto err;
1665259412Sluigi	/* other nmd fields are set in the common routine */
1666259412Sluigi	error = netmap_attach_common(na);
1667261909Sluigi	if (error)
1668261909Sluigi		goto err;
1669259412Sluigi	return 0;
1670261909Sluigi
1671261909Sluigierr:
1672261909Sluigi	if (na->nm_mem != NULL)
1673261909Sluigi		netmap_mem_private_delete(na->nm_mem);
1674261909Sluigi	free(vpna, M_DEVBUF);
1675261909Sluigi	return error;
1676259412Sluigi}
1677259412Sluigi
1678260368Sluigi
1679259412Sluigistatic void
1680259412Sluiginetmap_bwrap_dtor(struct netmap_adapter *na)
1681259412Sluigi{
1682259412Sluigi	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
1683259412Sluigi	struct netmap_adapter *hwna = bna->hwna;
1684259412Sluigi	struct nm_bridge *b = bna->up.na_bdg,
1685259412Sluigi		*bh = bna->host.na_bdg;
1686259412Sluigi	struct ifnet *ifp = na->ifp;
1687259412Sluigi
1688259412Sluigi	ND("na %p", na);
1689259412Sluigi
1690259412Sluigi	if (b) {
1691259412Sluigi		netmap_bdg_detach_common(b, bna->up.bdg_port,
1692259412Sluigi			(bh ? bna->host.bdg_port : -1));
1693259412Sluigi	}
1694259412Sluigi
1695259412Sluigi	hwna->na_private = NULL;
1696259412Sluigi	netmap_adapter_put(hwna);
1697259412Sluigi
1698259412Sluigi	bzero(ifp, sizeof(*ifp));
1699259412Sluigi	free(ifp, M_DEVBUF);
1700259412Sluigi	na->ifp = NULL;
1701259412Sluigi
1702259412Sluigi}
1703259412Sluigi
1704260368Sluigi
1705259412Sluigi/*
1706260368Sluigi * Intr callback for NICs connected to a bridge.
1707260368Sluigi * Simply ignore tx interrupts (maybe we could try to recover space ?)
1708260368Sluigi * and pass received packets from nic to the bridge.
1709260368Sluigi *
1710259412Sluigi * XXX TODO check locking: this is called from the interrupt
1711259412Sluigi * handler so we should make sure that the interface is not
1712259412Sluigi * disconnected while passing down an interrupt.
1713259412Sluigi *
1714260368Sluigi * Note, no user process can access this NIC or the host stack.
1715260368Sluigi * The only part of the ring that is significant are the slots,
1716260368Sluigi * and head/cur/tail are set from the kring as needed
1717260368Sluigi * (part as a receive ring, part as a transmit ring).
1718260368Sluigi *
1719260368Sluigi * callback that overwrites the hwna notify callback.
1720259412Sluigi * Packets come from the outside or from the host stack and are put on an hwna rx ring.
1721259412Sluigi * The bridge wrapper then sends the packets through the bridge.
1722259412Sluigi */
1723259412Sluigistatic int
1724259412Sluiginetmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, int flags)
1725259412Sluigi{
1726259412Sluigi	struct ifnet *ifp = na->ifp;
1727259412Sluigi	struct netmap_bwrap_adapter *bna = na->na_private;
1728259412Sluigi	struct netmap_vp_adapter *hostna = &bna->host;
1729259412Sluigi	struct netmap_kring *kring, *bkring;
1730259412Sluigi	struct netmap_ring *ring;
1731259412Sluigi	int is_host_ring = ring_nr == na->num_rx_rings;
1732259412Sluigi	struct netmap_vp_adapter *vpna = &bna->up;
1733259412Sluigi	int error = 0;
1734259412Sluigi
1735260368Sluigi	if (netmap_verbose)
1736260368Sluigi	    D("%s %s%d 0x%x", NM_IFPNAME(ifp),
1737260368Sluigi		(tx == NR_TX ? "TX" : "RX"), ring_nr, flags);
1738259412Sluigi
1739259412Sluigi	if (flags & NAF_DISABLE_NOTIFY) {
1740259412Sluigi		kring = tx == NR_TX ? na->tx_rings : na->rx_rings;
1741259412Sluigi		bkring = tx == NR_TX ? vpna->up.rx_rings : vpna->up.tx_rings;
1742260368Sluigi		if (kring[ring_nr].nkr_stopped)
1743260368Sluigi			netmap_disable_ring(&bkring[ring_nr]);
1744259412Sluigi		else
1745260368Sluigi			bkring[ring_nr].nkr_stopped = 0;
1746259412Sluigi		return 0;
1747259412Sluigi	}
1748259412Sluigi
1749259412Sluigi	if (ifp == NULL || !(ifp->if_capenable & IFCAP_NETMAP))
1750259412Sluigi		return 0;
1751259412Sluigi
1752260368Sluigi	/* we only care about receive interrupts */
1753259412Sluigi	if (tx == NR_TX)
1754259412Sluigi		return 0;
1755259412Sluigi
1756259412Sluigi	kring = &na->rx_rings[ring_nr];
1757259412Sluigi	ring = kring->ring;
1758259412Sluigi
1759259412Sluigi	/* make sure the ring is not disabled */
1760259412Sluigi	if (nm_kr_tryget(kring))
1761259412Sluigi		return 0;
1762259412Sluigi
1763259412Sluigi	if (is_host_ring && hostna->na_bdg == NULL) {
1764259412Sluigi		error = bna->save_notify(na, ring_nr, tx, flags);
1765259412Sluigi		goto put_out;
1766259412Sluigi	}
1767259412Sluigi
1768260368Sluigi	/* Here we expect ring->head = ring->cur = ring->tail
1769260368Sluigi	 * because everything has been released from the previous round.
1770260368Sluigi	 * However the ring is shared and we might have info from
1771260368Sluigi	 * the wrong side (the tx ring). Hence we overwrite with
1772260368Sluigi	 * the info from the rx kring.
1773260368Sluigi	 */
1774260368Sluigi	if (netmap_verbose)
1775260368Sluigi	    D("%s head %d cur %d tail %d (kring %d %d %d)",  NM_IFPNAME(ifp),
1776260368Sluigi		ring->head, ring->cur, ring->tail,
1777260368Sluigi		kring->rhead, kring->rcur, kring->rtail);
1778260368Sluigi
1779260368Sluigi	ring->head = kring->rhead;
1780260368Sluigi	ring->cur = kring->rcur;
1781260368Sluigi	ring->tail = kring->rtail;
1782260368Sluigi
1783259412Sluigi	if (is_host_ring) {
1784259412Sluigi		vpna = hostna;
1785259412Sluigi		ring_nr = 0;
1786267128Sluigi	}
1787261909Sluigi	/* simulate a user wakeup on the rx ring */
1788261909Sluigi	/* fetch packets that have arrived.
1789261909Sluigi	 * XXX maybe do this in a loop ?
1790261909Sluigi	 */
1791261909Sluigi	error = kring->nm_sync(kring, 0);
1792261909Sluigi	if (error)
1793261909Sluigi		goto put_out;
1794260368Sluigi	if (kring->nr_hwcur == kring->nr_hwtail && netmap_verbose) {
1795259412Sluigi		D("how strange, interrupt with no packets on %s",
1796259412Sluigi			NM_IFPNAME(ifp));
1797259412Sluigi		goto put_out;
1798259412Sluigi	}
1799260368Sluigi
1800260368Sluigi	/* new packets are ring->cur to ring->tail, and the bkring
1801260368Sluigi	 * had hwcur == ring->cur. So advance ring->cur to ring->tail
1802260368Sluigi	 * to push all packets out.
1803260368Sluigi	 */
1804260368Sluigi	ring->head = ring->cur = ring->tail;
1805260368Sluigi
1806260368Sluigi	/* also set tail to what the bwrap expects */
1807260368Sluigi	bkring = &vpna->up.tx_rings[ring_nr];
1808260368Sluigi	ring->tail = bkring->nr_hwtail; // rtail too ?
1809260368Sluigi
1810260368Sluigi	/* pass packets to the switch */
1811260368Sluigi	nm_txsync_prologue(bkring); // XXX error checking ?
1812259412Sluigi	netmap_vp_txsync(vpna, ring_nr, flags);
1813259412Sluigi
1814260368Sluigi	/* mark all buffers as released on this ring */
1815260368Sluigi	ring->head = ring->cur = kring->nr_hwtail;
1816260368Sluigi	ring->tail = kring->rtail;
1817260368Sluigi	/* another call to actually release the buffers */
1818260368Sluigi	if (!is_host_ring) {
1819261909Sluigi		error = kring->nm_sync(kring, 0);
1820260368Sluigi	} else {
1821260368Sluigi		/* mark all packets as released, as in the
1822260368Sluigi		 * second part of netmap_rxsync_from_host()
1823260368Sluigi		 */
1824260368Sluigi		kring->nr_hwcur = kring->nr_hwtail;
1825260368Sluigi		nm_rxsync_finalize(kring);
1826260368Sluigi	}
1827259412Sluigi
1828259412Sluigiput_out:
1829259412Sluigi	nm_kr_put(kring);
1830259412Sluigi	return error;
1831259412Sluigi}
1832259412Sluigi
1833260368Sluigi
1834259412Sluigistatic int
1835259412Sluiginetmap_bwrap_register(struct netmap_adapter *na, int onoff)
1836259412Sluigi{
1837259412Sluigi	struct netmap_bwrap_adapter *bna =
1838259412Sluigi		(struct netmap_bwrap_adapter *)na;
1839259412Sluigi	struct netmap_adapter *hwna = bna->hwna;
1840259412Sluigi	struct netmap_vp_adapter *hostna = &bna->host;
1841259412Sluigi	int error;
1842259412Sluigi
1843260368Sluigi	ND("%s %s", NM_IFPNAME(na->ifp), onoff ? "on" : "off");
1844259412Sluigi
1845259412Sluigi	if (onoff) {
1846259412Sluigi		int i;
1847259412Sluigi
1848259412Sluigi		hwna->na_lut = na->na_lut;
1849259412Sluigi		hwna->na_lut_objtotal = na->na_lut_objtotal;
1850259412Sluigi
1851259412Sluigi		if (hostna->na_bdg) {
1852259412Sluigi			hostna->up.na_lut = na->na_lut;
1853259412Sluigi			hostna->up.na_lut_objtotal = na->na_lut_objtotal;
1854259412Sluigi		}
1855259412Sluigi
1856260516Sluigi		/* cross-link the netmap rings
1857260516Sluigi		 * The original number of rings comes from hwna,
1858260516Sluigi		 * rx rings on one side equals tx rings on the other.
1859260516Sluigi		 */
1860261909Sluigi		for (i = 0; i < na->num_rx_rings + 1; i++) {
1861259412Sluigi			hwna->tx_rings[i].nkr_num_slots = na->rx_rings[i].nkr_num_slots;
1862259412Sluigi			hwna->tx_rings[i].ring = na->rx_rings[i].ring;
1863259412Sluigi		}
1864261909Sluigi		for (i = 0; i < na->num_tx_rings + 1; i++) {
1865259412Sluigi			hwna->rx_rings[i].nkr_num_slots = na->tx_rings[i].nkr_num_slots;
1866259412Sluigi			hwna->rx_rings[i].ring = na->tx_rings[i].ring;
1867259412Sluigi		}
1868259412Sluigi	}
1869259412Sluigi
1870259412Sluigi	if (hwna->ifp) {
1871259412Sluigi		error = hwna->nm_register(hwna, onoff);
1872259412Sluigi		if (error)
1873259412Sluigi			return error;
1874259412Sluigi	}
1875259412Sluigi
1876259412Sluigi	bdg_netmap_reg(na, onoff);
1877259412Sluigi
1878259412Sluigi	if (onoff) {
1879259412Sluigi		bna->save_notify = hwna->nm_notify;
1880259412Sluigi		hwna->nm_notify = netmap_bwrap_intr_notify;
1881259412Sluigi	} else {
1882259412Sluigi		hwna->nm_notify = bna->save_notify;
1883259412Sluigi		hwna->na_lut = NULL;
1884259412Sluigi		hwna->na_lut_objtotal = 0;
1885259412Sluigi	}
1886259412Sluigi
1887259412Sluigi	return 0;
1888259412Sluigi}
1889259412Sluigi
1890260368Sluigi
1891259412Sluigistatic int
1892259412Sluiginetmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
1893259412Sluigi				    u_int *rxr, u_int *rxd)
1894259412Sluigi{
1895259412Sluigi	struct netmap_bwrap_adapter *bna =
1896259412Sluigi		(struct netmap_bwrap_adapter *)na;
1897259412Sluigi	struct netmap_adapter *hwna = bna->hwna;
1898259412Sluigi
1899259412Sluigi	/* forward the request */
1900259412Sluigi	netmap_update_config(hwna);
1901259412Sluigi	/* swap the results */
1902259412Sluigi	*txr = hwna->num_rx_rings;
1903259412Sluigi	*txd = hwna->num_rx_desc;
1904259412Sluigi	*rxr = hwna->num_tx_rings;
1905259412Sluigi	*rxd = hwna->num_rx_desc;
1906259412Sluigi
1907259412Sluigi	return 0;
1908259412Sluigi}
1909259412Sluigi
1910260368Sluigi
1911259412Sluigistatic int
1912259412Sluiginetmap_bwrap_krings_create(struct netmap_adapter *na)
1913259412Sluigi{
1914259412Sluigi	struct netmap_bwrap_adapter *bna =
1915259412Sluigi		(struct netmap_bwrap_adapter *)na;
1916259412Sluigi	struct netmap_adapter *hwna = bna->hwna;
1917259412Sluigi	struct netmap_adapter *hostna = &bna->host.up;
1918259412Sluigi	int error;
1919259412Sluigi
1920259412Sluigi	ND("%s", NM_IFPNAME(na->ifp));
1921259412Sluigi
1922259412Sluigi	error = netmap_vp_krings_create(na);
1923259412Sluigi	if (error)
1924259412Sluigi		return error;
1925259412Sluigi
1926259412Sluigi	error = hwna->nm_krings_create(hwna);
1927259412Sluigi	if (error) {
1928259412Sluigi		netmap_vp_krings_delete(na);
1929259412Sluigi		return error;
1930259412Sluigi	}
1931259412Sluigi
1932261909Sluigi	if (na->na_flags & NAF_HOST_RINGS) {
1933261909Sluigi		hostna->tx_rings = na->tx_rings + na->num_tx_rings;
1934261909Sluigi		hostna->rx_rings = na->rx_rings + na->num_rx_rings;
1935261909Sluigi	}
1936259412Sluigi
1937259412Sluigi	return 0;
1938259412Sluigi}
1939259412Sluigi
1940260368Sluigi
1941259412Sluigistatic void
1942259412Sluiginetmap_bwrap_krings_delete(struct netmap_adapter *na)
1943259412Sluigi{
1944259412Sluigi	struct netmap_bwrap_adapter *bna =
1945259412Sluigi		(struct netmap_bwrap_adapter *)na;
1946259412Sluigi	struct netmap_adapter *hwna = bna->hwna;
1947259412Sluigi
1948259412Sluigi	ND("%s", NM_IFPNAME(na->ifp));
1949259412Sluigi
1950259412Sluigi	hwna->nm_krings_delete(hwna);
1951259412Sluigi	netmap_vp_krings_delete(na);
1952259412Sluigi}
1953259412Sluigi
1954260368Sluigi
1955259412Sluigi/* notify method for the bridge-->hwna direction */
1956259412Sluigistatic int
1957259412Sluiginetmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
1958259412Sluigi{
1959259412Sluigi	struct netmap_bwrap_adapter *bna =
1960259412Sluigi		(struct netmap_bwrap_adapter *)na;
1961259412Sluigi	struct netmap_adapter *hwna = bna->hwna;
1962259412Sluigi	struct netmap_kring *kring, *hw_kring;
1963259412Sluigi	struct netmap_ring *ring;
1964260368Sluigi	u_int lim;
1965259412Sluigi	int error = 0;
1966259412Sluigi
1967259412Sluigi	if (tx == NR_TX)
1968260700Sluigi	        return EINVAL;
1969259412Sluigi
1970259412Sluigi	kring = &na->rx_rings[ring_n];
1971259412Sluigi	hw_kring = &hwna->tx_rings[ring_n];
1972259412Sluigi	ring = kring->ring;
1973259412Sluigi	lim = kring->nkr_num_slots - 1;
1974259412Sluigi
1975259412Sluigi	if (hwna->ifp == NULL || !(hwna->ifp->if_capenable & IFCAP_NETMAP))
1976259412Sluigi		return 0;
1977261909Sluigi	mtx_lock(&kring->q_lock);
1978260368Sluigi	/* first step: simulate a user wakeup on the rx ring */
1979260368Sluigi	netmap_vp_rxsync(na, ring_n, flags);
1980260368Sluigi	ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
1981259412Sluigi		NM_IFPNAME(na->ifp), ring_n,
1982260368Sluigi		kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
1983260368Sluigi		ring->head, ring->cur, ring->tail,
1984260368Sluigi		hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail);
1985260368Sluigi	/* second step: the simulated user consumes all new packets */
1986260368Sluigi	ring->head = ring->cur = ring->tail;
1987260368Sluigi
1988260368Sluigi	/* third step: the new packets are sent on the tx ring
1989260368Sluigi	 * (which is actually the same ring)
1990260368Sluigi	 */
1991260368Sluigi	/* set tail to what the hw expects */
1992260368Sluigi	ring->tail = hw_kring->rtail;
1993261909Sluigi	nm_txsync_prologue(&hwna->tx_rings[ring_n]); // XXX error checking ?
1994261909Sluigi	error = hw_kring->nm_sync(hw_kring, flags);
1995260368Sluigi
1996260368Sluigi	/* fourth step: now we are back the rx ring */
1997260368Sluigi	/* claim ownership on all hw owned bufs */
1998260368Sluigi	ring->head = nm_next(ring->tail, lim); /* skip past reserved slot */
1999260368Sluigi	ring->tail = kring->rtail; /* restore saved value of tail, for safety */
2000260368Sluigi
2001260368Sluigi	/* fifth step: the user goes to sleep again, causing another rxsync */
2002260368Sluigi	netmap_vp_rxsync(na, ring_n, flags);
2003260368Sluigi	ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
2004259412Sluigi		NM_IFPNAME(na->ifp), ring_n,
2005260368Sluigi		kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
2006260368Sluigi		ring->head, ring->cur, ring->tail,
2007260368Sluigi		hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
2008261909Sluigi	mtx_unlock(&kring->q_lock);
2009259412Sluigi	return error;
2010259412Sluigi}
2011259412Sluigi
2012260368Sluigi
2013259412Sluigistatic int
2014259412Sluiginetmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
2015259412Sluigi{
2016259412Sluigi	struct netmap_bwrap_adapter *bna = na->na_private;
2017259412Sluigi	struct netmap_adapter *port_na = &bna->up.up;
2018259412Sluigi	if (tx == NR_TX || ring_n != 0)
2019260700Sluigi		return EINVAL;
2020259412Sluigi	return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags);
2021259412Sluigi}
2022259412Sluigi
2023260368Sluigi
2024259412Sluigi/* attach a bridge wrapper to the 'real' device */
2025259412Sluigistatic int
2026259412Sluiginetmap_bwrap_attach(struct ifnet *fake, struct ifnet *real)
2027259412Sluigi{
2028259412Sluigi	struct netmap_bwrap_adapter *bna;
2029259412Sluigi	struct netmap_adapter *na;
2030259412Sluigi	struct netmap_adapter *hwna = NA(real);
2031259412Sluigi	struct netmap_adapter *hostna;
2032259412Sluigi	int error;
2033259412Sluigi
2034259412Sluigi
2035259412Sluigi	bna = malloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO);
2036259412Sluigi	if (bna == NULL)
2037259412Sluigi		return ENOMEM;
2038259412Sluigi
2039259412Sluigi	na = &bna->up.up;
2040259412Sluigi	na->ifp = fake;
2041259412Sluigi	/* fill the ring data for the bwrap adapter with rx/tx meanings
2042259412Sluigi	 * swapped. The real cross-linking will be done during register,
2043259412Sluigi	 * when all the krings will have been created.
2044259412Sluigi	 */
2045259412Sluigi	na->num_rx_rings = hwna->num_tx_rings;
2046259412Sluigi	na->num_tx_rings = hwna->num_rx_rings;
2047259412Sluigi	na->num_tx_desc = hwna->num_rx_desc;
2048259412Sluigi	na->num_rx_desc = hwna->num_tx_desc;
2049259412Sluigi	na->nm_dtor = netmap_bwrap_dtor;
2050259412Sluigi	na->nm_register = netmap_bwrap_register;
2051259412Sluigi	// na->nm_txsync = netmap_bwrap_txsync;
2052259412Sluigi	// na->nm_rxsync = netmap_bwrap_rxsync;
2053259412Sluigi	na->nm_config = netmap_bwrap_config;
2054259412Sluigi	na->nm_krings_create = netmap_bwrap_krings_create;
2055259412Sluigi	na->nm_krings_delete = netmap_bwrap_krings_delete;
2056259412Sluigi	na->nm_notify = netmap_bwrap_notify;
2057259412Sluigi	na->nm_mem = hwna->nm_mem;
2058259412Sluigi	na->na_private = na; /* prevent NIOCREGIF */
2059259412Sluigi	bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
2060259412Sluigi
2061259412Sluigi	bna->hwna = hwna;
2062259412Sluigi	netmap_adapter_get(hwna);
2063259412Sluigi	hwna->na_private = bna; /* weak reference */
2064261909Sluigi
2065261909Sluigi	if (hwna->na_flags & NAF_HOST_RINGS) {
2066261909Sluigi		na->na_flags |= NAF_HOST_RINGS;
2067261909Sluigi		hostna = &bna->host.up;
2068261909Sluigi		hostna->ifp = hwna->ifp;
2069261909Sluigi		hostna->num_tx_rings = 1;
2070261909Sluigi		hostna->num_tx_desc = hwna->num_rx_desc;
2071261909Sluigi		hostna->num_rx_rings = 1;
2072261909Sluigi		hostna->num_rx_desc = hwna->num_tx_desc;
2073261909Sluigi		// hostna->nm_txsync = netmap_bwrap_host_txsync;
2074261909Sluigi		// hostna->nm_rxsync = netmap_bwrap_host_rxsync;
2075261909Sluigi		hostna->nm_notify = netmap_bwrap_host_notify;
2076261909Sluigi		hostna->nm_mem = na->nm_mem;
2077261909Sluigi		hostna->na_private = bna;
2078261909Sluigi	}
2079259412Sluigi
2080260368Sluigi	ND("%s<->%s txr %d txd %d rxr %d rxd %d",
2081260368Sluigi		fake->if_xname, real->if_xname,
2082259412Sluigi		na->num_tx_rings, na->num_tx_desc,
2083259412Sluigi		na->num_rx_rings, na->num_rx_desc);
2084259412Sluigi
2085259412Sluigi	error = netmap_attach_common(na);
2086259412Sluigi	if (error) {
2087259412Sluigi		netmap_adapter_put(hwna);
2088259412Sluigi		free(bna, M_DEVBUF);
2089259412Sluigi		return error;
2090259412Sluigi	}
2091259412Sluigi	return 0;
2092259412Sluigi}
2093259412Sluigi
2094260368Sluigi
2095259412Sluigivoid
2096259412Sluiginetmap_init_bridges(void)
2097259412Sluigi{
2098259412Sluigi	int i;
2099259412Sluigi	bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */
2100259412Sluigi	for (i = 0; i < NM_BRIDGES; i++)
2101259412Sluigi		BDG_RWINIT(&nm_bridges[i]);
2102259412Sluigi}
2103259412Sluigi#endif /* WITH_VALE */
2104