netmap_vale.c revision 259412
1259412Sluigi/*
2259412Sluigi * Copyright (C) 2013 Universita` di Pisa. All rights reserved.
3259412Sluigi *
4259412Sluigi * Redistribution and use in source and binary forms, with or without
5259412Sluigi * modification, are permitted provided that the following conditions
6259412Sluigi * are met:
7259412Sluigi *   1. Redistributions of source code must retain the above copyright
8259412Sluigi *      notice, this list of conditions and the following disclaimer.
9259412Sluigi *   2. Redistributions in binary form must reproduce the above copyright
10259412Sluigi *      notice, this list of conditions and the following disclaimer in the
11259412Sluigi *      documentation and/or other materials provided with the distribution.
12259412Sluigi *
13259412Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14259412Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15259412Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16259412Sluigi * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17259412Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18259412Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19259412Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20259412Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21259412Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22259412Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23259412Sluigi * SUCH DAMAGE.
24259412Sluigi */
25259412Sluigi
26259412Sluigi
27259412Sluigi/*
28259412Sluigi * This module implements the VALE switch for netmap
29259412Sluigi
30259412Sluigi--- VALE SWITCH ---
31259412Sluigi
32259412SluigiNMG_LOCK() serializes all modifications to switches and ports.
33259412SluigiA switch cannot be deleted until all ports are gone.
34259412Sluigi
35259412SluigiFor each switch, an SX lock (RWlock on linux) protects
36259412Sluigideletion of ports. When configuring or deleting a new port, the
37259412Sluigilock is acquired in exclusive mode (after holding NMG_LOCK).
38259412SluigiWhen forwarding, the lock is acquired in shared mode (without NMG_LOCK).
39259412SluigiThe lock is held throughout the entire forwarding cycle,
40259412Sluigiduring which the thread may incur in a page fault.
41259412SluigiHence it is important that sleepable shared locks are used.
42259412Sluigi
43259412SluigiOn the rx ring, the per-port lock is grabbed initially to reserve
44259412Sluigia number of slot in the ring, then the lock is released,
45259412Sluigipackets are copied from source to destination, and then
46259412Sluigithe lock is acquired again and the receive ring is updated.
47259412Sluigi(A similar thing is done on the tx ring for NIC and host stack
48259412Sluigiports attached to the switch)
49259412Sluigi
50259412Sluigi */
51259412Sluigi
52259412Sluigi/*
53259412Sluigi * OS-specific code that is used only within this file.
54259412Sluigi * Other OS-specific code that must be accessed by drivers
55259412Sluigi * is present in netmap_kern.h
56259412Sluigi */
57259412Sluigi
58259412Sluigi#if defined(__FreeBSD__)
59259412Sluigi#include <sys/cdefs.h> /* prerequisite */
60259412Sluigi__FBSDID("$FreeBSD: head/sys/dev/netmap/netmap_vale.c 259412 2013-12-15 08:37:24Z luigi $");
61259412Sluigi
62259412Sluigi#include <sys/types.h>
63259412Sluigi#include <sys/errno.h>
64259412Sluigi#include <sys/param.h>	/* defines used in kernel.h */
65259412Sluigi#include <sys/kernel.h>	/* types used in module initialization */
66259412Sluigi#include <sys/conf.h>	/* cdevsw struct, UID, GID */
67259412Sluigi#include <sys/sockio.h>
68259412Sluigi#include <sys/socketvar.h>	/* struct socket */
69259412Sluigi#include <sys/malloc.h>
70259412Sluigi#include <sys/poll.h>
71259412Sluigi#include <sys/rwlock.h>
72259412Sluigi#include <sys/socket.h> /* sockaddrs */
73259412Sluigi#include <sys/selinfo.h>
74259412Sluigi#include <sys/sysctl.h>
75259412Sluigi#include <net/if.h>
76259412Sluigi#include <net/if_var.h>
77259412Sluigi#include <net/bpf.h>		/* BIOCIMMEDIATE */
78259412Sluigi#include <machine/bus.h>	/* bus_dmamap_* */
79259412Sluigi#include <sys/endian.h>
80259412Sluigi#include <sys/refcount.h>
81259412Sluigi
82259412Sluigi// #define prefetch(x)	__builtin_prefetch(x)
83259412Sluigi
84259412Sluigi
85259412Sluigi#define BDG_RWLOCK_T		struct rwlock // struct rwlock
86259412Sluigi
87259412Sluigi#define	BDG_RWINIT(b)		\
88259412Sluigi	rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS)
89259412Sluigi#define BDG_WLOCK(b)		rw_wlock(&(b)->bdg_lock)
90259412Sluigi#define BDG_WUNLOCK(b)		rw_wunlock(&(b)->bdg_lock)
91259412Sluigi#define BDG_RLOCK(b)		rw_rlock(&(b)->bdg_lock)
92259412Sluigi#define BDG_RTRYLOCK(b)		rw_try_rlock(&(b)->bdg_lock)
93259412Sluigi#define BDG_RUNLOCK(b)		rw_runlock(&(b)->bdg_lock)
94259412Sluigi#define BDG_RWDESTROY(b)	rw_destroy(&(b)->bdg_lock)
95259412Sluigi
96259412Sluigi
97259412Sluigi#elif defined(linux)
98259412Sluigi
99259412Sluigi#include "bsd_glue.h"
100259412Sluigi
101259412Sluigi#elif defined(__APPLE__)
102259412Sluigi
103259412Sluigi#warning OSX support is only partial
104259412Sluigi#include "osx_glue.h"
105259412Sluigi
106259412Sluigi#else
107259412Sluigi
108259412Sluigi#error	Unsupported platform
109259412Sluigi
110259412Sluigi#endif /* unsupported */
111259412Sluigi
112259412Sluigi/*
113259412Sluigi * common headers
114259412Sluigi */
115259412Sluigi
116259412Sluigi#include <net/netmap.h>
117259412Sluigi#include <dev/netmap/netmap_kern.h>
118259412Sluigi#include <dev/netmap/netmap_mem2.h>
119259412Sluigi
120259412Sluigi#ifdef WITH_VALE
121259412Sluigi
122259412Sluigi/*
123259412Sluigi * system parameters (most of them in netmap_kern.h)
124259412Sluigi * NM_NAME	prefix for switch port names, default "vale"
125259412Sluigi * NM_BDG_MAXPORTS	number of ports
126259412Sluigi * NM_BRIDGES	max number of switches in the system.
127259412Sluigi *	XXX should become a sysctl or tunable
128259412Sluigi *
129259412Sluigi * Switch ports are named valeX:Y where X is the switch name and Y
130259412Sluigi * is the port. If Y matches a physical interface name, the port is
131259412Sluigi * connected to a physical device.
132259412Sluigi *
133259412Sluigi * Unlike physical interfaces, switch ports use their own memory region
134259412Sluigi * for rings and buffers.
135259412Sluigi * The virtual interfaces use per-queue lock instead of core lock.
136259412Sluigi * In the tx loop, we aggregate traffic in batches to make all operations
137259412Sluigi * faster. The batch size is bridge_batch.
138259412Sluigi */
139259412Sluigi#define NM_BDG_MAXRINGS		16	/* XXX unclear how many. */
140259412Sluigi#define NM_BDG_MAXSLOTS		4096	/* XXX same as above */
141259412Sluigi#define NM_BRIDGE_RINGSIZE	1024	/* in the device */
142259412Sluigi#define NM_BDG_HASH		1024	/* forwarding table entries */
143259412Sluigi#define NM_BDG_BATCH		1024	/* entries in the forwarding buffer */
144259412Sluigi#define NM_MULTISEG		64	/* max size of a chain of bufs */
145259412Sluigi/* actual size of the tables */
146259412Sluigi#define NM_BDG_BATCH_MAX	(NM_BDG_BATCH + NM_MULTISEG)
147259412Sluigi/* NM_FT_NULL terminates a list of slots in the ft */
148259412Sluigi#define NM_FT_NULL		NM_BDG_BATCH_MAX
149259412Sluigi#define	NM_BRIDGES		8	/* number of bridges */
150259412Sluigi
151259412Sluigi
152259412Sluigi/*
153259412Sluigi * bridge_batch is set via sysctl to the max batch size to be
154259412Sluigi * used in the bridge. The actual value may be larger as the
155259412Sluigi * last packet in the block may overflow the size.
156259412Sluigi */
157259412Sluigiint bridge_batch = NM_BDG_BATCH; /* bridge batch size */
158259412SluigiSYSCTL_DECL(_dev_netmap);
159259412SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
160259412Sluigi
161259412Sluigi
162259412Sluigistatic int bdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp);
163259412Sluigistatic int bdg_netmap_reg(struct netmap_adapter *na, int onoff);
164259412Sluigistatic int netmap_bwrap_attach(struct ifnet *, struct ifnet *);
165259412Sluigistatic int netmap_bwrap_register(struct netmap_adapter *, int onoff);
166259412Sluigiint kern_netmap_regif(struct nmreq *nmr);
167259412Sluigi
168259412Sluigi/*
169259412Sluigi * Each transmit queue accumulates a batch of packets into
170259412Sluigi * a structure before forwarding. Packets to the same
171259412Sluigi * destination are put in a list using ft_next as a link field.
172259412Sluigi * ft_frags and ft_next are valid only on the first fragment.
173259412Sluigi */
174259412Sluigistruct nm_bdg_fwd {	/* forwarding entry for a bridge */
175259412Sluigi	void *ft_buf;		/* netmap or indirect buffer */
176259412Sluigi	uint8_t ft_frags;	/* how many fragments (only on 1st frag) */
177259412Sluigi	uint8_t _ft_port;	/* dst port (unused) */
178259412Sluigi	uint16_t ft_flags;	/* flags, e.g. indirect */
179259412Sluigi	uint16_t ft_len;	/* src fragment len */
180259412Sluigi	uint16_t ft_next;	/* next packet to same destination */
181259412Sluigi};
182259412Sluigi
183259412Sluigi/*
184259412Sluigi * For each output interface, nm_bdg_q is used to construct a list.
185259412Sluigi * bq_len is the number of output buffers (we can have coalescing
186259412Sluigi * during the copy).
187259412Sluigi */
188259412Sluigistruct nm_bdg_q {
189259412Sluigi	uint16_t bq_head;
190259412Sluigi	uint16_t bq_tail;
191259412Sluigi	uint32_t bq_len;	/* number of buffers */
192259412Sluigi};
193259412Sluigi
194259412Sluigi/* XXX revise this */
195259412Sluigistruct nm_hash_ent {
196259412Sluigi	uint64_t	mac;	/* the top 2 bytes are the epoch */
197259412Sluigi	uint64_t	ports;
198259412Sluigi};
199259412Sluigi
200259412Sluigi/*
201259412Sluigi * nm_bridge is a descriptor for a VALE switch.
202259412Sluigi * Interfaces for a bridge are all in bdg_ports[].
203259412Sluigi * The array has fixed size, an empty entry does not terminate
204259412Sluigi * the search, but lookups only occur on attach/detach so we
205259412Sluigi * don't mind if they are slow.
206259412Sluigi *
207259412Sluigi * The bridge is non blocking on the transmit ports: excess
208259412Sluigi * packets are dropped if there is no room on the output port.
209259412Sluigi *
210259412Sluigi * bdg_lock protects accesses to the bdg_ports array.
211259412Sluigi * This is a rw lock (or equivalent).
212259412Sluigi */
213259412Sluigistruct nm_bridge {
214259412Sluigi	/* XXX what is the proper alignment/layout ? */
215259412Sluigi	BDG_RWLOCK_T	bdg_lock;	/* protects bdg_ports */
216259412Sluigi	int		bdg_namelen;
217259412Sluigi	uint32_t	bdg_active_ports; /* 0 means free */
218259412Sluigi	char		bdg_basename[IFNAMSIZ];
219259412Sluigi
220259412Sluigi	/* Indexes of active ports (up to active_ports)
221259412Sluigi	 * and all other remaining ports.
222259412Sluigi	 */
223259412Sluigi	uint8_t		bdg_port_index[NM_BDG_MAXPORTS];
224259412Sluigi
225259412Sluigi	struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS];
226259412Sluigi
227259412Sluigi
228259412Sluigi	/*
229259412Sluigi	 * The function to decide the destination port.
230259412Sluigi	 * It returns either of an index of the destination port,
231259412Sluigi	 * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
232259412Sluigi	 * forward this packet.  ring_nr is the source ring index, and the
233259412Sluigi	 * function may overwrite this value to forward this packet to a
234259412Sluigi	 * different ring index.
235259412Sluigi	 * This function must be set by netmap_bdgctl().
236259412Sluigi	 */
237259412Sluigi	bdg_lookup_fn_t nm_bdg_lookup;
238259412Sluigi
239259412Sluigi	/* the forwarding table, MAC+ports.
240259412Sluigi	 * XXX should be changed to an argument to be passed to
241259412Sluigi	 * the lookup function, and allocated on attach
242259412Sluigi	 */
243259412Sluigi	struct nm_hash_ent ht[NM_BDG_HASH];
244259412Sluigi};
245259412Sluigi
246259412Sluigi
247259412Sluigi/*
248259412Sluigi * XXX in principle nm_bridges could be created dynamically
249259412Sluigi * Right now we have a static array and deletions are protected
250259412Sluigi * by an exclusive lock.
251259412Sluigi */
252259412Sluigistruct nm_bridge nm_bridges[NM_BRIDGES];
253259412Sluigi
254259412Sluigi
255259412Sluigi/*
256259412Sluigi * A few function to tell which kind of port are we using.
257259412Sluigi * XXX should we hold a lock ?
258259412Sluigi *
259259412Sluigi * nma_is_vp()		virtual port
260259412Sluigi * nma_is_host()	port connected to the host stack
261259412Sluigi * nma_is_hw()		port connected to a NIC
262259412Sluigi * nma_is_generic()	generic netmap adapter XXX stop this madness
263259412Sluigi */
264259412Sluigistatic __inline int
265259412Sluiginma_is_vp(struct netmap_adapter *na)
266259412Sluigi{
267259412Sluigi	return na->nm_register == bdg_netmap_reg;
268259412Sluigi}
269259412Sluigi
270259412Sluigi
271259412Sluigistatic __inline int
272259412Sluiginma_is_host(struct netmap_adapter *na)
273259412Sluigi{
274259412Sluigi	return na->nm_register == NULL;
275259412Sluigi}
276259412Sluigi
277259412Sluigi
278259412Sluigistatic __inline int
279259412Sluiginma_is_hw(struct netmap_adapter *na)
280259412Sluigi{
281259412Sluigi	/* In case of sw adapter, nm_register is NULL */
282259412Sluigi	return !nma_is_vp(na) && !nma_is_host(na) && !nma_is_generic(na);
283259412Sluigi}
284259412Sluigi
285259412Sluigistatic __inline int
286259412Sluiginma_is_bwrap(struct netmap_adapter *na)
287259412Sluigi{
288259412Sluigi	return na->nm_register == netmap_bwrap_register;
289259412Sluigi}
290259412Sluigi
291259412Sluigi
292259412Sluigi
293259412Sluigi/*
294259412Sluigi * this is a slightly optimized copy routine which rounds
295259412Sluigi * to multiple of 64 bytes and is often faster than dealing
296259412Sluigi * with other odd sizes. We assume there is enough room
297259412Sluigi * in the source and destination buffers.
298259412Sluigi *
299259412Sluigi * XXX only for multiples of 64 bytes, non overlapped.
300259412Sluigi */
301259412Sluigistatic inline void
302259412Sluigipkt_copy(void *_src, void *_dst, int l)
303259412Sluigi{
304259412Sluigi        uint64_t *src = _src;
305259412Sluigi        uint64_t *dst = _dst;
306259412Sluigi        if (unlikely(l >= 1024)) {
307259412Sluigi                memcpy(dst, src, l);
308259412Sluigi                return;
309259412Sluigi        }
310259412Sluigi        for (; likely(l > 0); l-=64) {
311259412Sluigi                *dst++ = *src++;
312259412Sluigi                *dst++ = *src++;
313259412Sluigi                *dst++ = *src++;
314259412Sluigi                *dst++ = *src++;
315259412Sluigi                *dst++ = *src++;
316259412Sluigi                *dst++ = *src++;
317259412Sluigi                *dst++ = *src++;
318259412Sluigi                *dst++ = *src++;
319259412Sluigi        }
320259412Sluigi}
321259412Sluigi
322259412Sluigi
323259412Sluigi
324259412Sluigi/*
325259412Sluigi * locate a bridge among the existing ones.
326259412Sluigi * MUST BE CALLED WITH NMG_LOCK()
327259412Sluigi *
328259412Sluigi * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
329259412Sluigi * We assume that this is called with a name of at least NM_NAME chars.
330259412Sluigi */
331259412Sluigistatic struct nm_bridge *
332259412Sluiginm_find_bridge(const char *name, int create)
333259412Sluigi{
334259412Sluigi	int i, l, namelen;
335259412Sluigi	struct nm_bridge *b = NULL;
336259412Sluigi
337259412Sluigi	NMG_LOCK_ASSERT();
338259412Sluigi
339259412Sluigi	namelen = strlen(NM_NAME);	/* base length */
340259412Sluigi	l = name ? strlen(name) : 0;		/* actual length */
341259412Sluigi	if (l < namelen) {
342259412Sluigi		D("invalid bridge name %s", name ? name : NULL);
343259412Sluigi		return NULL;
344259412Sluigi	}
345259412Sluigi	for (i = namelen + 1; i < l; i++) {
346259412Sluigi		if (name[i] == ':') {
347259412Sluigi			namelen = i;
348259412Sluigi			break;
349259412Sluigi		}
350259412Sluigi	}
351259412Sluigi	if (namelen >= IFNAMSIZ)
352259412Sluigi		namelen = IFNAMSIZ;
353259412Sluigi	ND("--- prefix is '%.*s' ---", namelen, name);
354259412Sluigi
355259412Sluigi	/* lookup the name, remember empty slot if there is one */
356259412Sluigi	for (i = 0; i < NM_BRIDGES; i++) {
357259412Sluigi		struct nm_bridge *x = nm_bridges + i;
358259412Sluigi
359259412Sluigi		if (x->bdg_active_ports == 0) {
360259412Sluigi			if (create && b == NULL)
361259412Sluigi				b = x;	/* record empty slot */
362259412Sluigi		} else if (x->bdg_namelen != namelen) {
363259412Sluigi			continue;
364259412Sluigi		} else if (strncmp(name, x->bdg_basename, namelen) == 0) {
365259412Sluigi			ND("found '%.*s' at %d", namelen, name, i);
366259412Sluigi			b = x;
367259412Sluigi			break;
368259412Sluigi		}
369259412Sluigi	}
370259412Sluigi	if (i == NM_BRIDGES && b) { /* name not found, can create entry */
371259412Sluigi		/* initialize the bridge */
372259412Sluigi		strncpy(b->bdg_basename, name, namelen);
373259412Sluigi		ND("create new bridge %s with ports %d", b->bdg_basename,
374259412Sluigi			b->bdg_active_ports);
375259412Sluigi		b->bdg_namelen = namelen;
376259412Sluigi		b->bdg_active_ports = 0;
377259412Sluigi		for (i = 0; i < NM_BDG_MAXPORTS; i++)
378259412Sluigi			b->bdg_port_index[i] = i;
379259412Sluigi		/* set the default function */
380259412Sluigi		b->nm_bdg_lookup = netmap_bdg_learning;
381259412Sluigi		/* reset the MAC address table */
382259412Sluigi		bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
383259412Sluigi	}
384259412Sluigi	return b;
385259412Sluigi}
386259412Sluigi
387259412Sluigi
388259412Sluigi/*
389259412Sluigi * Free the forwarding tables for rings attached to switch ports.
390259412Sluigi */
391259412Sluigistatic void
392259412Sluiginm_free_bdgfwd(struct netmap_adapter *na)
393259412Sluigi{
394259412Sluigi	int nrings, i;
395259412Sluigi	struct netmap_kring *kring;
396259412Sluigi
397259412Sluigi	NMG_LOCK_ASSERT();
398259412Sluigi	nrings = nma_is_vp(na) ? na->num_tx_rings : na->num_rx_rings;
399259412Sluigi	kring = nma_is_vp(na) ? na->tx_rings : na->rx_rings;
400259412Sluigi	for (i = 0; i < nrings; i++) {
401259412Sluigi		if (kring[i].nkr_ft) {
402259412Sluigi			free(kring[i].nkr_ft, M_DEVBUF);
403259412Sluigi			kring[i].nkr_ft = NULL; /* protect from freeing twice */
404259412Sluigi		}
405259412Sluigi	}
406259412Sluigi}
407259412Sluigi
408259412Sluigi
409259412Sluigi/*
410259412Sluigi * Allocate the forwarding tables for the rings attached to the bridge ports.
411259412Sluigi */
412259412Sluigistatic int
413259412Sluiginm_alloc_bdgfwd(struct netmap_adapter *na)
414259412Sluigi{
415259412Sluigi	int nrings, l, i, num_dstq;
416259412Sluigi	struct netmap_kring *kring;
417259412Sluigi
418259412Sluigi	NMG_LOCK_ASSERT();
419259412Sluigi	/* all port:rings + broadcast */
420259412Sluigi	num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
421259412Sluigi	l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
422259412Sluigi	l += sizeof(struct nm_bdg_q) * num_dstq;
423259412Sluigi	l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
424259412Sluigi
425259412Sluigi	nrings = na->num_tx_rings + 1;
426259412Sluigi	kring = na->tx_rings;
427259412Sluigi	for (i = 0; i < nrings; i++) {
428259412Sluigi		struct nm_bdg_fwd *ft;
429259412Sluigi		struct nm_bdg_q *dstq;
430259412Sluigi		int j;
431259412Sluigi
432259412Sluigi		ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
433259412Sluigi		if (!ft) {
434259412Sluigi			nm_free_bdgfwd(na);
435259412Sluigi			return ENOMEM;
436259412Sluigi		}
437259412Sluigi		dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
438259412Sluigi		for (j = 0; j < num_dstq; j++) {
439259412Sluigi			dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
440259412Sluigi			dstq[j].bq_len = 0;
441259412Sluigi		}
442259412Sluigi		kring[i].nkr_ft = ft;
443259412Sluigi	}
444259412Sluigi	return 0;
445259412Sluigi}
446259412Sluigi
447259412Sluigi
448259412Sluigistatic void
449259412Sluiginetmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
450259412Sluigi{
451259412Sluigi	int s_hw = hw, s_sw = sw;
452259412Sluigi	int i, lim =b->bdg_active_ports;
453259412Sluigi	uint8_t tmp[NM_BDG_MAXPORTS];
454259412Sluigi
455259412Sluigi	/*
456259412Sluigi	New algorithm:
457259412Sluigi	make a copy of bdg_port_index;
458259412Sluigi	lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
459259412Sluigi	in the array of bdg_port_index, replacing them with
460259412Sluigi	entries from the bottom of the array;
461259412Sluigi	decrement bdg_active_ports;
462259412Sluigi	acquire BDG_WLOCK() and copy back the array.
463259412Sluigi	 */
464259412Sluigi
465259412Sluigi	D("detach %d and %d (lim %d)", hw, sw, lim);
466259412Sluigi	/* make a copy of the list of active ports, update it,
467259412Sluigi	 * and then copy back within BDG_WLOCK().
468259412Sluigi	 */
469259412Sluigi	memcpy(tmp, b->bdg_port_index, sizeof(tmp));
470259412Sluigi	for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
471259412Sluigi		if (hw >= 0 && tmp[i] == hw) {
472259412Sluigi			ND("detach hw %d at %d", hw, i);
473259412Sluigi			lim--; /* point to last active port */
474259412Sluigi			tmp[i] = tmp[lim]; /* swap with i */
475259412Sluigi			tmp[lim] = hw;	/* now this is inactive */
476259412Sluigi			hw = -1;
477259412Sluigi		} else if (sw >= 0 && tmp[i] == sw) {
478259412Sluigi			ND("detach sw %d at %d", sw, i);
479259412Sluigi			lim--;
480259412Sluigi			tmp[i] = tmp[lim];
481259412Sluigi			tmp[lim] = sw;
482259412Sluigi			sw = -1;
483259412Sluigi		} else {
484259412Sluigi			i++;
485259412Sluigi		}
486259412Sluigi	}
487259412Sluigi	if (hw >= 0 || sw >= 0) {
488259412Sluigi		D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
489259412Sluigi	}
490259412Sluigi
491259412Sluigi	BDG_WLOCK(b);
492259412Sluigi	b->bdg_ports[s_hw] = NULL;
493259412Sluigi	if (s_sw >= 0) {
494259412Sluigi		b->bdg_ports[s_sw] = NULL;
495259412Sluigi	}
496259412Sluigi	memcpy(b->bdg_port_index, tmp, sizeof(tmp));
497259412Sluigi	b->bdg_active_ports = lim;
498259412Sluigi	BDG_WUNLOCK(b);
499259412Sluigi
500259412Sluigi	ND("now %d active ports", lim);
501259412Sluigi	if (lim == 0) {
502259412Sluigi		ND("marking bridge %s as free", b->bdg_basename);
503259412Sluigi		b->nm_bdg_lookup = NULL;
504259412Sluigi	}
505259412Sluigi}
506259412Sluigi
507259412Sluigistatic void
508259412Sluiginetmap_adapter_vp_dtor(struct netmap_adapter *na)
509259412Sluigi{
510259412Sluigi	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
511259412Sluigi	struct nm_bridge *b = vpna->na_bdg;
512259412Sluigi	struct ifnet *ifp = na->ifp;
513259412Sluigi
514259412Sluigi	ND("%s has %d references", NM_IFPNAME(ifp), na->na_refcount);
515259412Sluigi
516259412Sluigi	if (b) {
517259412Sluigi		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
518259412Sluigi	}
519259412Sluigi
520259412Sluigi	bzero(ifp, sizeof(*ifp));
521259412Sluigi	free(ifp, M_DEVBUF);
522259412Sluigi	na->ifp = NULL;
523259412Sluigi}
524259412Sluigi
525259412Sluigiint
526259412Sluiginetmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
527259412Sluigi{
528259412Sluigi	const char *name = nmr->nr_name;
529259412Sluigi	struct ifnet *ifp;
530259412Sluigi	int error = 0;
531259412Sluigi	struct netmap_adapter *ret;
532259412Sluigi	struct netmap_vp_adapter *vpna;
533259412Sluigi	struct nm_bridge *b;
534259412Sluigi	int i, j, cand = -1, cand2 = -1;
535259412Sluigi	int needed;
536259412Sluigi
537259412Sluigi	*na = NULL;     /* default return value */
538259412Sluigi
539259412Sluigi	/* first try to see if this is a bridge port. */
540259412Sluigi	NMG_LOCK_ASSERT();
541259412Sluigi	if (strncmp(name, NM_NAME, sizeof(NM_NAME) - 1)) {
542259412Sluigi		return 0;  /* no error, but no VALE prefix */
543259412Sluigi	}
544259412Sluigi
545259412Sluigi	b = nm_find_bridge(name, create);
546259412Sluigi	if (b == NULL) {
547259412Sluigi		D("no bridges available for '%s'", name);
548259412Sluigi		return (ENXIO);
549259412Sluigi	}
550259412Sluigi
551259412Sluigi	/* Now we are sure that name starts with the bridge's name,
552259412Sluigi	 * lookup the port in the bridge. We need to scan the entire
553259412Sluigi	 * list. It is not important to hold a WLOCK on the bridge
554259412Sluigi	 * during the search because NMG_LOCK already guarantees
555259412Sluigi	 * that there are no other possible writers.
556259412Sluigi	 */
557259412Sluigi
558259412Sluigi	/* lookup in the local list of ports */
559259412Sluigi	for (j = 0; j < b->bdg_active_ports; j++) {
560259412Sluigi		i = b->bdg_port_index[j];
561259412Sluigi		vpna = b->bdg_ports[i];
562259412Sluigi		// KASSERT(na != NULL);
563259412Sluigi		ifp = vpna->up.ifp;
564259412Sluigi		/* XXX make sure the name only contains one : */
565259412Sluigi		if (!strcmp(NM_IFPNAME(ifp), name)) {
566259412Sluigi			netmap_adapter_get(&vpna->up);
567259412Sluigi			ND("found existing if %s refs %d", name,
568259412Sluigi				vpna->na_bdg_refcount);
569259412Sluigi			*na = (struct netmap_adapter *)vpna;
570259412Sluigi			return 0;
571259412Sluigi		}
572259412Sluigi	}
573259412Sluigi	/* not found, should we create it? */
574259412Sluigi	if (!create)
575259412Sluigi		return ENXIO;
576259412Sluigi	/* yes we should, see if we have space to attach entries */
577259412Sluigi	needed = 2; /* in some cases we only need 1 */
578259412Sluigi	if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
579259412Sluigi		D("bridge full %d, cannot create new port", b->bdg_active_ports);
580259412Sluigi		return EINVAL;
581259412Sluigi	}
582259412Sluigi	/* record the next two ports available, but do not allocate yet */
583259412Sluigi	cand = b->bdg_port_index[b->bdg_active_ports];
584259412Sluigi	cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
585259412Sluigi	ND("+++ bridge %s port %s used %d avail %d %d",
586259412Sluigi		b->bdg_basename, name, b->bdg_active_ports, cand, cand2);
587259412Sluigi
588259412Sluigi	/*
589259412Sluigi	 * try see if there is a matching NIC with this name
590259412Sluigi	 * (after the bridge's name)
591259412Sluigi	 */
592259412Sluigi	ifp = ifunit_ref(name + b->bdg_namelen + 1);
593259412Sluigi	if (!ifp) { /* this is a virtual port */
594259412Sluigi		if (nmr->nr_cmd) {
595259412Sluigi			/* nr_cmd must be 0 for a virtual port */
596259412Sluigi			return EINVAL;
597259412Sluigi		}
598259412Sluigi
599259412Sluigi	 	/* create a struct ifnet for the new port.
600259412Sluigi		 * need M_NOWAIT as we are under nma_lock
601259412Sluigi		 */
602259412Sluigi		ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
603259412Sluigi		if (!ifp)
604259412Sluigi			return ENOMEM;
605259412Sluigi
606259412Sluigi		strcpy(ifp->if_xname, name);
607259412Sluigi		/* bdg_netmap_attach creates a struct netmap_adapter */
608259412Sluigi		error = bdg_netmap_attach(nmr, ifp);
609259412Sluigi		if (error) {
610259412Sluigi			D("error %d", error);
611259412Sluigi			free(ifp, M_DEVBUF);
612259412Sluigi			return error;
613259412Sluigi		}
614259412Sluigi		ret = NA(ifp);
615259412Sluigi		cand2 = -1;	/* only need one port */
616259412Sluigi	} else {  /* this is a NIC */
617259412Sluigi		struct ifnet *fake_ifp;
618259412Sluigi
619259412Sluigi		error = netmap_get_hw_na(ifp, &ret);
620259412Sluigi		if (error || ret == NULL)
621259412Sluigi			goto out;
622259412Sluigi
623259412Sluigi		/* make sure the NIC is not already in use */
624259412Sluigi		if (NETMAP_OWNED_BY_ANY(ret)) {
625259412Sluigi			D("NIC %s busy, cannot attach to bridge",
626259412Sluigi				NM_IFPNAME(ifp));
627259412Sluigi			error = EINVAL;
628259412Sluigi			goto out;
629259412Sluigi		}
630259412Sluigi		/* create a fake interface */
631259412Sluigi		fake_ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
632259412Sluigi		if (!fake_ifp) {
633259412Sluigi			error = ENOMEM;
634259412Sluigi			goto out;
635259412Sluigi		}
636259412Sluigi		strcpy(fake_ifp->if_xname, name);
637259412Sluigi		error = netmap_bwrap_attach(fake_ifp, ifp);
638259412Sluigi		if (error) {
639259412Sluigi			free(fake_ifp, M_DEVBUF);
640259412Sluigi			goto out;
641259412Sluigi		}
642259412Sluigi		ret = NA(fake_ifp);
643259412Sluigi		if (nmr->nr_arg1 != NETMAP_BDG_HOST)
644259412Sluigi			cand2 = -1; /* only need one port */
645259412Sluigi		if_rele(ifp);
646259412Sluigi	}
647259412Sluigi	vpna = (struct netmap_vp_adapter *)ret;
648259412Sluigi
649259412Sluigi	BDG_WLOCK(b);
650259412Sluigi	vpna->bdg_port = cand;
651259412Sluigi	ND("NIC  %p to bridge port %d", vpna, cand);
652259412Sluigi	/* bind the port to the bridge (virtual ports are not active) */
653259412Sluigi	b->bdg_ports[cand] = vpna;
654259412Sluigi	vpna->na_bdg = b;
655259412Sluigi	b->bdg_active_ports++;
656259412Sluigi	if (cand2 >= 0) {
657259412Sluigi		struct netmap_vp_adapter *hostna = vpna + 1;
658259412Sluigi		/* also bind the host stack to the bridge */
659259412Sluigi		b->bdg_ports[cand2] = hostna;
660259412Sluigi		hostna->bdg_port = cand2;
661259412Sluigi		hostna->na_bdg = b;
662259412Sluigi		b->bdg_active_ports++;
663259412Sluigi		ND("host %p to bridge port %d", hostna, cand2);
664259412Sluigi	}
665259412Sluigi	ND("if %s refs %d", name, vpna->up.na_refcount);
666259412Sluigi	BDG_WUNLOCK(b);
667259412Sluigi	*na = ret;
668259412Sluigi	netmap_adapter_get(ret);
669259412Sluigi	return 0;
670259412Sluigi
671259412Sluigiout:
672259412Sluigi	if_rele(ifp);
673259412Sluigi
674259412Sluigi	return error;
675259412Sluigi}
676259412Sluigi
677259412Sluigi
678259412Sluigi/* Process NETMAP_BDG_ATTACH and NETMAP_BDG_DETACH */
679259412Sluigistatic int
680259412Sluiginm_bdg_attach(struct nmreq *nmr)
681259412Sluigi{
682259412Sluigi	struct netmap_adapter *na;
683259412Sluigi	struct netmap_if *nifp;
684259412Sluigi	struct netmap_priv_d *npriv;
685259412Sluigi	struct netmap_bwrap_adapter *bna;
686259412Sluigi	int error;
687259412Sluigi
688259412Sluigi	npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO);
689259412Sluigi	if (npriv == NULL)
690259412Sluigi		return ENOMEM;
691259412Sluigi	NMG_LOCK();
692259412Sluigi	/* XXX probably netmap_get_bdg_na() */
693259412Sluigi	error = netmap_get_na(nmr, &na, 1 /* create if not exists */);
694259412Sluigi	if (error) /* no device, or another bridge or user owns the device */
695259412Sluigi		goto unlock_exit;
696259412Sluigi	/* netmap_get_na() sets na_bdg if this is a physical interface
697259412Sluigi	 * that we can attach to a switch.
698259412Sluigi	 */
699259412Sluigi	if (!nma_is_bwrap(na)) {
700259412Sluigi		/* got reference to a virtual port or direct access to a NIC.
701259412Sluigi		 * perhaps specified no bridge prefix or wrong NIC name
702259412Sluigi		 */
703259412Sluigi		error = EINVAL;
704259412Sluigi		goto unref_exit;
705259412Sluigi	}
706259412Sluigi
707259412Sluigi	if (na->active_fds > 0) { /* already registered */
708259412Sluigi		error = EBUSY;
709259412Sluigi		goto unref_exit;
710259412Sluigi	}
711259412Sluigi
712259412Sluigi	nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, &error);
713259412Sluigi	if (!nifp) {
714259412Sluigi		goto unref_exit;
715259412Sluigi	}
716259412Sluigi
717259412Sluigi	bna = (struct netmap_bwrap_adapter*)na;
718259412Sluigi	bna->na_kpriv = npriv;
719259412Sluigi	NMG_UNLOCK();
720259412Sluigi	ND("registered %s to netmap-mode", NM_IFPNAME(na->ifp));
721259412Sluigi	return 0;
722259412Sluigi
723259412Sluigiunref_exit:
724259412Sluigi	netmap_adapter_put(na);
725259412Sluigiunlock_exit:
726259412Sluigi	NMG_UNLOCK();
727259412Sluigi	bzero(npriv, sizeof(*npriv));
728259412Sluigi	free(npriv, M_DEVBUF);
729259412Sluigi	return error;
730259412Sluigi}
731259412Sluigi
732259412Sluigistatic int
733259412Sluiginm_bdg_detach(struct nmreq *nmr)
734259412Sluigi{
735259412Sluigi	struct netmap_adapter *na;
736259412Sluigi	int error;
737259412Sluigi	struct netmap_bwrap_adapter *bna;
738259412Sluigi	int last_instance;
739259412Sluigi
740259412Sluigi	NMG_LOCK();
741259412Sluigi	error = netmap_get_na(nmr, &na, 0 /* don't create */);
742259412Sluigi	if (error) { /* no device, or another bridge or user owns the device */
743259412Sluigi		goto unlock_exit;
744259412Sluigi	}
745259412Sluigi	if (!nma_is_bwrap(na)) {
746259412Sluigi		/* got reference to a virtual port or direct access to a NIC.
747259412Sluigi		 * perhaps specified no bridge's prefix or wrong NIC's name
748259412Sluigi		 */
749259412Sluigi		error = EINVAL;
750259412Sluigi		goto unref_exit;
751259412Sluigi	}
752259412Sluigi	bna = (struct netmap_bwrap_adapter *)na;
753259412Sluigi
754259412Sluigi	if (na->active_fds == 0) { /* not registered */
755259412Sluigi		error = EINVAL;
756259412Sluigi		goto unref_exit;
757259412Sluigi	}
758259412Sluigi
759259412Sluigi	last_instance = netmap_dtor_locked(bna->na_kpriv); /* unregister */
760259412Sluigi	if (!last_instance) {
761259412Sluigi		D("--- error, trying to detach an entry with active mmaps");
762259412Sluigi		error = EINVAL;
763259412Sluigi	} else {
764259412Sluigi		struct netmap_priv_d *npriv = bna->na_kpriv;
765259412Sluigi
766259412Sluigi		bna->na_kpriv = NULL;
767259412Sluigi		D("deleting priv");
768259412Sluigi
769259412Sluigi		bzero(npriv, sizeof(*npriv));
770259412Sluigi		free(npriv, M_DEVBUF);
771259412Sluigi	}
772259412Sluigi
773259412Sluigiunref_exit:
774259412Sluigi	netmap_adapter_put(na);
775259412Sluigiunlock_exit:
776259412Sluigi	NMG_UNLOCK();
777259412Sluigi	return error;
778259412Sluigi
779259412Sluigi}
780259412Sluigi
781259412Sluigi
782259412Sluigi/* exported to kernel callers, e.g. OVS ?
783259412Sluigi * Entry point.
784259412Sluigi * Called without NMG_LOCK.
785259412Sluigi */
786259412Sluigiint
787259412Sluiginetmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func)
788259412Sluigi{
789259412Sluigi	struct nm_bridge *b;
790259412Sluigi	struct netmap_adapter *na;
791259412Sluigi	struct netmap_vp_adapter *vpna;
792259412Sluigi	struct ifnet *iter;
793259412Sluigi	char *name = nmr->nr_name;
794259412Sluigi	int cmd = nmr->nr_cmd, namelen = strlen(name);
795259412Sluigi	int error = 0, i, j;
796259412Sluigi
797259412Sluigi	switch (cmd) {
798259412Sluigi	case NETMAP_BDG_ATTACH:
799259412Sluigi		error = nm_bdg_attach(nmr);
800259412Sluigi		break;
801259412Sluigi
802259412Sluigi	case NETMAP_BDG_DETACH:
803259412Sluigi		error = nm_bdg_detach(nmr);
804259412Sluigi		break;
805259412Sluigi
806259412Sluigi	case NETMAP_BDG_LIST:
807259412Sluigi		/* this is used to enumerate bridges and ports */
808259412Sluigi		if (namelen) { /* look up indexes of bridge and port */
809259412Sluigi			if (strncmp(name, NM_NAME, strlen(NM_NAME))) {
810259412Sluigi				error = EINVAL;
811259412Sluigi				break;
812259412Sluigi			}
813259412Sluigi			NMG_LOCK();
814259412Sluigi			b = nm_find_bridge(name, 0 /* don't create */);
815259412Sluigi			if (!b) {
816259412Sluigi				error = ENOENT;
817259412Sluigi				NMG_UNLOCK();
818259412Sluigi				break;
819259412Sluigi			}
820259412Sluigi
821259412Sluigi			error = ENOENT;
822259412Sluigi			for (j = 0; j < b->bdg_active_ports; j++) {
823259412Sluigi				i = b->bdg_port_index[j];
824259412Sluigi				vpna = b->bdg_ports[i];
825259412Sluigi				if (vpna == NULL) {
826259412Sluigi					D("---AAAAAAAAARGH-------");
827259412Sluigi					continue;
828259412Sluigi				}
829259412Sluigi				iter = vpna->up.ifp;
830259412Sluigi				/* the former and the latter identify a
831259412Sluigi				 * virtual port and a NIC, respectively
832259412Sluigi				 */
833259412Sluigi				if (!strcmp(iter->if_xname, name)) {
834259412Sluigi					/* bridge index */
835259412Sluigi					nmr->nr_arg1 = b - nm_bridges;
836259412Sluigi					nmr->nr_arg2 = i; /* port index */
837259412Sluigi					error = 0;
838259412Sluigi					break;
839259412Sluigi				}
840259412Sluigi			}
841259412Sluigi			NMG_UNLOCK();
842259412Sluigi		} else {
843259412Sluigi			/* return the first non-empty entry starting from
844259412Sluigi			 * bridge nr_arg1 and port nr_arg2.
845259412Sluigi			 *
846259412Sluigi			 * Users can detect the end of the same bridge by
847259412Sluigi			 * seeing the new and old value of nr_arg1, and can
848259412Sluigi			 * detect the end of all the bridge by error != 0
849259412Sluigi			 */
850259412Sluigi			i = nmr->nr_arg1;
851259412Sluigi			j = nmr->nr_arg2;
852259412Sluigi
853259412Sluigi			NMG_LOCK();
854259412Sluigi			for (error = ENOENT; i < NM_BRIDGES; i++) {
855259412Sluigi				b = nm_bridges + i;
856259412Sluigi				if (j >= b->bdg_active_ports) {
857259412Sluigi					j = 0; /* following bridges scan from 0 */
858259412Sluigi					continue;
859259412Sluigi				}
860259412Sluigi				nmr->nr_arg1 = i;
861259412Sluigi				nmr->nr_arg2 = j;
862259412Sluigi				j = b->bdg_port_index[j];
863259412Sluigi				vpna = b->bdg_ports[j];
864259412Sluigi				iter = vpna->up.ifp;
865259412Sluigi				strncpy(name, iter->if_xname, (size_t)IFNAMSIZ);
866259412Sluigi				error = 0;
867259412Sluigi				break;
868259412Sluigi			}
869259412Sluigi			NMG_UNLOCK();
870259412Sluigi		}
871259412Sluigi		break;
872259412Sluigi
873259412Sluigi	case NETMAP_BDG_LOOKUP_REG:
874259412Sluigi		/* register a lookup function to the given bridge.
875259412Sluigi		 * nmr->nr_name may be just bridge's name (including ':'
876259412Sluigi		 * if it is not just NM_NAME).
877259412Sluigi		 */
878259412Sluigi		if (!func) {
879259412Sluigi			error = EINVAL;
880259412Sluigi			break;
881259412Sluigi		}
882259412Sluigi		NMG_LOCK();
883259412Sluigi		b = nm_find_bridge(name, 0 /* don't create */);
884259412Sluigi		if (!b) {
885259412Sluigi			error = EINVAL;
886259412Sluigi		} else {
887259412Sluigi			b->nm_bdg_lookup = func;
888259412Sluigi		}
889259412Sluigi		NMG_UNLOCK();
890259412Sluigi		break;
891259412Sluigi
892259412Sluigi	case NETMAP_BDG_OFFSET:
893259412Sluigi		NMG_LOCK();
894259412Sluigi		error = netmap_get_bdg_na(nmr, &na, 0);
895259412Sluigi		if (!error) {
896259412Sluigi			vpna = (struct netmap_vp_adapter *)na;
897259412Sluigi			if (nmr->nr_arg1 > NETMAP_BDG_MAX_OFFSET)
898259412Sluigi				nmr->nr_arg1 = NETMAP_BDG_MAX_OFFSET;
899259412Sluigi			vpna->offset = nmr->nr_arg1;
900259412Sluigi			D("Using offset %d for %p", vpna->offset, vpna);
901259412Sluigi		}
902259412Sluigi		NMG_UNLOCK();
903259412Sluigi		break;
904259412Sluigi
905259412Sluigi	default:
906259412Sluigi		D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd);
907259412Sluigi		error = EINVAL;
908259412Sluigi		break;
909259412Sluigi	}
910259412Sluigi	return error;
911259412Sluigi}
912259412Sluigi
913259412Sluigi
914259412Sluigistatic int
915259412Sluiginetmap_vp_krings_create(struct netmap_adapter *na)
916259412Sluigi{
917259412Sluigi	u_int ntx, nrx, tailroom;
918259412Sluigi	int error, i;
919259412Sluigi	uint32_t *leases;
920259412Sluigi
921259412Sluigi	/* XXX vps do not need host rings,
922259412Sluigi	 * but we crash if we don't have one
923259412Sluigi	 */
924259412Sluigi	ntx = na->num_tx_rings + 1;
925259412Sluigi	nrx = na->num_rx_rings + 1;
926259412Sluigi
927259412Sluigi	/*
928259412Sluigi	 * Leases are attached to RX rings on vale ports
929259412Sluigi	 */
930259412Sluigi	tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;
931259412Sluigi
932259412Sluigi	error = netmap_krings_create(na, ntx, nrx, tailroom);
933259412Sluigi	if (error)
934259412Sluigi		return error;
935259412Sluigi
936259412Sluigi	leases = na->tailroom;
937259412Sluigi
938259412Sluigi	for (i = 0; i < nrx; i++) { /* Receive rings */
939259412Sluigi		na->rx_rings[i].nkr_leases = leases;
940259412Sluigi		leases += na->num_rx_desc;
941259412Sluigi	}
942259412Sluigi
943259412Sluigi	error = nm_alloc_bdgfwd(na);
944259412Sluigi	if (error) {
945259412Sluigi		netmap_krings_delete(na);
946259412Sluigi		return error;
947259412Sluigi	}
948259412Sluigi
949259412Sluigi	return 0;
950259412Sluigi}
951259412Sluigi
952259412Sluigistatic void
953259412Sluiginetmap_vp_krings_delete(struct netmap_adapter *na)
954259412Sluigi{
955259412Sluigi	nm_free_bdgfwd(na);
956259412Sluigi	netmap_krings_delete(na);
957259412Sluigi}
958259412Sluigi
959259412Sluigi
960259412Sluigistatic int
961259412Sluiginm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
962259412Sluigi	struct netmap_vp_adapter *na, u_int ring_nr);
963259412Sluigi
964259412Sluigi
965259412Sluigi/*
966259412Sluigi * Grab packets from a kring, move them into the ft structure
967259412Sluigi * associated to the tx (input) port. Max one instance per port,
968259412Sluigi * filtered on input (ioctl, poll or XXX).
969259412Sluigi * Returns the next position in the ring.
970259412Sluigi */
971259412Sluigistatic int
972259412Sluiginm_bdg_preflush(struct netmap_vp_adapter *na, u_int ring_nr,
973259412Sluigi	struct netmap_kring *kring, u_int end)
974259412Sluigi{
975259412Sluigi	struct netmap_ring *ring = kring->ring;
976259412Sluigi	struct nm_bdg_fwd *ft;
977259412Sluigi	u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
978259412Sluigi	u_int ft_i = 0;	/* start from 0 */
979259412Sluigi	u_int frags = 1; /* how many frags ? */
980259412Sluigi	struct nm_bridge *b = na->na_bdg;
981259412Sluigi
982259412Sluigi	/* To protect against modifications to the bridge we acquire a
983259412Sluigi	 * shared lock, waiting if we can sleep (if the source port is
984259412Sluigi	 * attached to a user process) or with a trylock otherwise (NICs).
985259412Sluigi	 */
986259412Sluigi	ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
987259412Sluigi	if (na->up.na_flags & NAF_BDG_MAYSLEEP)
988259412Sluigi		BDG_RLOCK(b);
989259412Sluigi	else if (!BDG_RTRYLOCK(b))
990259412Sluigi		return 0;
991259412Sluigi	ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
992259412Sluigi	ft = kring->nkr_ft;
993259412Sluigi
994259412Sluigi	for (; likely(j != end); j = nm_next(j, lim)) {
995259412Sluigi		struct netmap_slot *slot = &ring->slot[j];
996259412Sluigi		char *buf;
997259412Sluigi
998259412Sluigi		ft[ft_i].ft_len = slot->len;
999259412Sluigi		ft[ft_i].ft_flags = slot->flags;
1000259412Sluigi
1001259412Sluigi		ND("flags is 0x%x", slot->flags);
1002259412Sluigi		/* this slot goes into a list so initialize the link field */
1003259412Sluigi		ft[ft_i].ft_next = NM_FT_NULL;
1004259412Sluigi		buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
1005259412Sluigi			(void *)(uintptr_t)slot->ptr : BDG_NMB(&na->up, slot);
1006259412Sluigi		prefetch(buf);
1007259412Sluigi		++ft_i;
1008259412Sluigi		if (slot->flags & NS_MOREFRAG) {
1009259412Sluigi			frags++;
1010259412Sluigi			continue;
1011259412Sluigi		}
1012259412Sluigi		if (unlikely(netmap_verbose && frags > 1))
1013259412Sluigi			RD(5, "%d frags at %d", frags, ft_i - frags);
1014259412Sluigi		ft[ft_i - frags].ft_frags = frags;
1015259412Sluigi		frags = 1;
1016259412Sluigi		if (unlikely((int)ft_i >= bridge_batch))
1017259412Sluigi			ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1018259412Sluigi	}
1019259412Sluigi	if (frags > 1) {
1020259412Sluigi		D("truncate incomplete fragment at %d (%d frags)", ft_i, frags);
1021259412Sluigi		// ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG
1022259412Sluigi		ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG;
1023259412Sluigi		ft[ft_i - frags].ft_frags = frags - 1;
1024259412Sluigi	}
1025259412Sluigi	if (ft_i)
1026259412Sluigi		ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1027259412Sluigi	BDG_RUNLOCK(b);
1028259412Sluigi	return j;
1029259412Sluigi}
1030259412Sluigi
1031259412Sluigi
1032259412Sluigi/*
1033259412Sluigi *---- support for virtual bridge -----
1034259412Sluigi */
1035259412Sluigi
1036259412Sluigi/* ----- FreeBSD if_bridge hash function ------- */
1037259412Sluigi
1038259412Sluigi/*
1039259412Sluigi * The following hash function is adapted from "Hash Functions" by Bob Jenkins
1040259412Sluigi * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
1041259412Sluigi *
1042259412Sluigi * http://www.burtleburtle.net/bob/hash/spooky.html
1043259412Sluigi */
1044259412Sluigi#define mix(a, b, c)                                                    \
1045259412Sluigido {                                                                    \
1046259412Sluigi        a -= b; a -= c; a ^= (c >> 13);                                 \
1047259412Sluigi        b -= c; b -= a; b ^= (a << 8);                                  \
1048259412Sluigi        c -= a; c -= b; c ^= (b >> 13);                                 \
1049259412Sluigi        a -= b; a -= c; a ^= (c >> 12);                                 \
1050259412Sluigi        b -= c; b -= a; b ^= (a << 16);                                 \
1051259412Sluigi        c -= a; c -= b; c ^= (b >> 5);                                  \
1052259412Sluigi        a -= b; a -= c; a ^= (c >> 3);                                  \
1053259412Sluigi        b -= c; b -= a; b ^= (a << 10);                                 \
1054259412Sluigi        c -= a; c -= b; c ^= (b >> 15);                                 \
1055259412Sluigi} while (/*CONSTCOND*/0)
1056259412Sluigi
1057259412Sluigistatic __inline uint32_t
1058259412Sluiginm_bridge_rthash(const uint8_t *addr)
1059259412Sluigi{
1060259412Sluigi        uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
1061259412Sluigi
1062259412Sluigi        b += addr[5] << 8;
1063259412Sluigi        b += addr[4];
1064259412Sluigi        a += addr[3] << 24;
1065259412Sluigi        a += addr[2] << 16;
1066259412Sluigi        a += addr[1] << 8;
1067259412Sluigi        a += addr[0];
1068259412Sluigi
1069259412Sluigi        mix(a, b, c);
1070259412Sluigi#define BRIDGE_RTHASH_MASK	(NM_BDG_HASH-1)
1071259412Sluigi        return (c & BRIDGE_RTHASH_MASK);
1072259412Sluigi}
1073259412Sluigi
1074259412Sluigi#undef mix
1075259412Sluigi
1076259412Sluigi
1077259412Sluigistatic int
1078259412Sluigibdg_netmap_reg(struct netmap_adapter *na, int onoff)
1079259412Sluigi{
1080259412Sluigi	struct netmap_vp_adapter *vpna =
1081259412Sluigi		(struct netmap_vp_adapter*)na;
1082259412Sluigi	struct ifnet *ifp = na->ifp;
1083259412Sluigi
1084259412Sluigi	/* the interface is already attached to the bridge,
1085259412Sluigi	 * so we only need to toggle IFCAP_NETMAP.
1086259412Sluigi	 */
1087259412Sluigi	BDG_WLOCK(vpna->na_bdg);
1088259412Sluigi	if (onoff) {
1089259412Sluigi		ifp->if_capenable |= IFCAP_NETMAP;
1090259412Sluigi	} else {
1091259412Sluigi		ifp->if_capenable &= ~IFCAP_NETMAP;
1092259412Sluigi	}
1093259412Sluigi	BDG_WUNLOCK(vpna->na_bdg);
1094259412Sluigi	return 0;
1095259412Sluigi}
1096259412Sluigi
1097259412Sluigi
1098259412Sluigi/*
1099259412Sluigi * Lookup function for a learning bridge.
1100259412Sluigi * Update the hash table with the source address,
1101259412Sluigi * and then returns the destination port index, and the
1102259412Sluigi * ring in *dst_ring (at the moment, always use ring 0)
1103259412Sluigi */
1104259412Sluigiu_int
1105259412Sluiginetmap_bdg_learning(char *buf, u_int buf_len, uint8_t *dst_ring,
1106259412Sluigi		struct netmap_vp_adapter *na)
1107259412Sluigi{
1108259412Sluigi	struct nm_hash_ent *ht = na->na_bdg->ht;
1109259412Sluigi	uint32_t sh, dh;
1110259412Sluigi	u_int dst, mysrc = na->bdg_port;
1111259412Sluigi	uint64_t smac, dmac;
1112259412Sluigi
1113259412Sluigi	if (buf_len < 14) {
1114259412Sluigi		D("invalid buf length %d", buf_len);
1115259412Sluigi		return NM_BDG_NOPORT;
1116259412Sluigi	}
1117259412Sluigi	dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
1118259412Sluigi	smac = le64toh(*(uint64_t *)(buf + 4));
1119259412Sluigi	smac >>= 16;
1120259412Sluigi
1121259412Sluigi	/*
1122259412Sluigi	 * The hash is somewhat expensive, there might be some
1123259412Sluigi	 * worthwhile optimizations here.
1124259412Sluigi	 */
1125259412Sluigi	if ((buf[6] & 1) == 0) { /* valid src */
1126259412Sluigi		uint8_t *s = buf+6;
1127259412Sluigi		sh = nm_bridge_rthash(s); // XXX hash of source
1128259412Sluigi		/* update source port forwarding entry */
1129259412Sluigi		ht[sh].mac = smac;	/* XXX expire ? */
1130259412Sluigi		ht[sh].ports = mysrc;
1131259412Sluigi		if (netmap_verbose)
1132259412Sluigi		    D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
1133259412Sluigi			s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
1134259412Sluigi	}
1135259412Sluigi	dst = NM_BDG_BROADCAST;
1136259412Sluigi	if ((buf[0] & 1) == 0) { /* unicast */
1137259412Sluigi		dh = nm_bridge_rthash(buf); // XXX hash of dst
1138259412Sluigi		if (ht[dh].mac == dmac) {	/* found dst */
1139259412Sluigi			dst = ht[dh].ports;
1140259412Sluigi		}
1141259412Sluigi		/* XXX otherwise return NM_BDG_UNKNOWN ? */
1142259412Sluigi	}
1143259412Sluigi	*dst_ring = 0;
1144259412Sluigi	return dst;
1145259412Sluigi}
1146259412Sluigi
1147259412Sluigi
1148259412Sluigi/*
1149259412Sluigi * This flush routine supports only unicast and broadcast but a large
1150259412Sluigi * number of ports, and lets us replace the learn and dispatch functions.
1151259412Sluigi */
1152259412Sluigiint
1153259412Sluiginm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
1154259412Sluigi		u_int ring_nr)
1155259412Sluigi{
1156259412Sluigi	struct nm_bdg_q *dst_ents, *brddst;
1157259412Sluigi	uint16_t num_dsts = 0, *dsts;
1158259412Sluigi	struct nm_bridge *b = na->na_bdg;
1159259412Sluigi	u_int i, j, me = na->bdg_port;
1160259412Sluigi
1161259412Sluigi	/*
1162259412Sluigi	 * The work area (pointed by ft) is followed by an array of
1163259412Sluigi	 * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
1164259412Sluigi	 * queues per port plus one for the broadcast traffic.
1165259412Sluigi	 * Then we have an array of destination indexes.
1166259412Sluigi	 */
1167259412Sluigi	dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
1168259412Sluigi	dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
1169259412Sluigi
1170259412Sluigi	/* first pass: find a destination for each packet in the batch */
1171259412Sluigi	for (i = 0; likely(i < n); i += ft[i].ft_frags) {
1172259412Sluigi		uint8_t dst_ring = ring_nr; /* default, same ring as origin */
1173259412Sluigi		uint16_t dst_port, d_i;
1174259412Sluigi		struct nm_bdg_q *d;
1175259412Sluigi		uint8_t *buf = ft[i].ft_buf;
1176259412Sluigi		u_int len = ft[i].ft_len;
1177259412Sluigi
1178259412Sluigi		ND("slot %d frags %d", i, ft[i].ft_frags);
1179259412Sluigi		/* Drop the packet if the offset is not into the first
1180259412Sluigi		   fragment nor at the very beginning of the second. */
1181259412Sluigi		if (unlikely(na->offset > len))
1182259412Sluigi			continue;
1183259412Sluigi		if (len == na->offset) {
1184259412Sluigi			buf = ft[i+1].ft_buf;
1185259412Sluigi			len = ft[i+1].ft_len;
1186259412Sluigi		} else {
1187259412Sluigi			buf += na->offset;
1188259412Sluigi			len -= na->offset;
1189259412Sluigi		}
1190259412Sluigi		dst_port = b->nm_bdg_lookup(buf, len, &dst_ring, na);
1191259412Sluigi		if (netmap_verbose > 255)
1192259412Sluigi			RD(5, "slot %d port %d -> %d", i, me, dst_port);
1193259412Sluigi		if (dst_port == NM_BDG_NOPORT)
1194259412Sluigi			continue; /* this packet is identified to be dropped */
1195259412Sluigi		else if (unlikely(dst_port > NM_BDG_MAXPORTS))
1196259412Sluigi			continue;
1197259412Sluigi		else if (dst_port == NM_BDG_BROADCAST)
1198259412Sluigi			dst_ring = 0; /* broadcasts always go to ring 0 */
1199259412Sluigi		else if (unlikely(dst_port == me ||
1200259412Sluigi		    !b->bdg_ports[dst_port]))
1201259412Sluigi			continue;
1202259412Sluigi
1203259412Sluigi		/* get a position in the scratch pad */
1204259412Sluigi		d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
1205259412Sluigi		d = dst_ents + d_i;
1206259412Sluigi
1207259412Sluigi		/* append the first fragment to the list */
1208259412Sluigi		if (d->bq_head == NM_FT_NULL) { /* new destination */
1209259412Sluigi			d->bq_head = d->bq_tail = i;
1210259412Sluigi			/* remember this position to be scanned later */
1211259412Sluigi			if (dst_port != NM_BDG_BROADCAST)
1212259412Sluigi				dsts[num_dsts++] = d_i;
1213259412Sluigi		} else {
1214259412Sluigi			ft[d->bq_tail].ft_next = i;
1215259412Sluigi			d->bq_tail = i;
1216259412Sluigi		}
1217259412Sluigi		d->bq_len += ft[i].ft_frags;
1218259412Sluigi	}
1219259412Sluigi
1220259412Sluigi	/*
1221259412Sluigi	 * Broadcast traffic goes to ring 0 on all destinations.
1222259412Sluigi	 * So we need to add these rings to the list of ports to scan.
1223259412Sluigi	 * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
1224259412Sluigi	 * expensive. We should keep a compact list of active destinations
1225259412Sluigi	 * so we could shorten this loop.
1226259412Sluigi	 */
1227259412Sluigi	brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
1228259412Sluigi	if (brddst->bq_head != NM_FT_NULL) {
1229259412Sluigi		for (j = 0; likely(j < b->bdg_active_ports); j++) {
1230259412Sluigi			uint16_t d_i;
1231259412Sluigi			i = b->bdg_port_index[j];
1232259412Sluigi			if (unlikely(i == me))
1233259412Sluigi				continue;
1234259412Sluigi			d_i = i * NM_BDG_MAXRINGS;
1235259412Sluigi			if (dst_ents[d_i].bq_head == NM_FT_NULL)
1236259412Sluigi				dsts[num_dsts++] = d_i;
1237259412Sluigi		}
1238259412Sluigi	}
1239259412Sluigi
1240259412Sluigi	ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
1241259412Sluigi	/* second pass: scan destinations (XXX will be modular somehow) */
1242259412Sluigi	for (i = 0; i < num_dsts; i++) {
1243259412Sluigi		struct ifnet *dst_ifp;
1244259412Sluigi		struct netmap_vp_adapter *dst_na;
1245259412Sluigi		struct netmap_kring *kring;
1246259412Sluigi		struct netmap_ring *ring;
1247259412Sluigi		u_int dst_nr, lim, j, sent = 0, d_i, next, brd_next;
1248259412Sluigi		u_int needed, howmany;
1249259412Sluigi		int retry = netmap_txsync_retry;
1250259412Sluigi		struct nm_bdg_q *d;
1251259412Sluigi		uint32_t my_start = 0, lease_idx = 0;
1252259412Sluigi		int nrings;
1253259412Sluigi		int offset_mismatch;
1254259412Sluigi
1255259412Sluigi		d_i = dsts[i];
1256259412Sluigi		ND("second pass %d port %d", i, d_i);
1257259412Sluigi		d = dst_ents + d_i;
1258259412Sluigi		// XXX fix the division
1259259412Sluigi		dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
1260259412Sluigi		/* protect from the lookup function returning an inactive
1261259412Sluigi		 * destination port
1262259412Sluigi		 */
1263259412Sluigi		if (unlikely(dst_na == NULL))
1264259412Sluigi			goto cleanup;
1265259412Sluigi		if (dst_na->up.na_flags & NAF_SW_ONLY)
1266259412Sluigi			goto cleanup;
1267259412Sluigi		dst_ifp = dst_na->up.ifp;
1268259412Sluigi		/*
1269259412Sluigi		 * The interface may be in !netmap mode in two cases:
1270259412Sluigi		 * - when na is attached but not activated yet;
1271259412Sluigi		 * - when na is being deactivated but is still attached.
1272259412Sluigi		 */
1273259412Sluigi		if (unlikely(!(dst_ifp->if_capenable & IFCAP_NETMAP))) {
1274259412Sluigi			ND("not in netmap mode!");
1275259412Sluigi			goto cleanup;
1276259412Sluigi		}
1277259412Sluigi
1278259412Sluigi		offset_mismatch = (dst_na->offset != na->offset);
1279259412Sluigi
1280259412Sluigi		/* there is at least one either unicast or broadcast packet */
1281259412Sluigi		brd_next = brddst->bq_head;
1282259412Sluigi		next = d->bq_head;
1283259412Sluigi		/* we need to reserve this many slots. If fewer are
1284259412Sluigi		 * available, some packets will be dropped.
1285259412Sluigi		 * Packets may have multiple fragments, so we may not use
1286259412Sluigi		 * there is a chance that we may not use all of the slots
1287259412Sluigi		 * we have claimed, so we will need to handle the leftover
1288259412Sluigi		 * ones when we regain the lock.
1289259412Sluigi		 */
1290259412Sluigi		needed = d->bq_len + brddst->bq_len;
1291259412Sluigi
1292259412Sluigi		ND(5, "pass 2 dst %d is %x %s",
1293259412Sluigi			i, d_i, is_vp ? "virtual" : "nic/host");
1294259412Sluigi		dst_nr = d_i & (NM_BDG_MAXRINGS-1);
1295259412Sluigi		nrings = dst_na->up.num_rx_rings;
1296259412Sluigi		if (dst_nr >= nrings)
1297259412Sluigi			dst_nr = dst_nr % nrings;
1298259412Sluigi		kring = &dst_na->up.rx_rings[dst_nr];
1299259412Sluigi		ring = kring->ring;
1300259412Sluigi		lim = kring->nkr_num_slots - 1;
1301259412Sluigi
1302259412Sluigiretry:
1303259412Sluigi
1304259412Sluigi		/* reserve the buffers in the queue and an entry
1305259412Sluigi		 * to report completion, and drop lock.
1306259412Sluigi		 * XXX this might become a helper function.
1307259412Sluigi		 */
1308259412Sluigi		mtx_lock(&kring->q_lock);
1309259412Sluigi		if (kring->nkr_stopped) {
1310259412Sluigi			mtx_unlock(&kring->q_lock);
1311259412Sluigi			goto cleanup;
1312259412Sluigi		}
1313259412Sluigi		if (dst_na->retry) {
1314259412Sluigi			dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1315259412Sluigi		}
1316259412Sluigi		my_start = j = kring->nkr_hwlease;
1317259412Sluigi		howmany = nm_kr_space(kring, 1);
1318259412Sluigi		if (needed < howmany)
1319259412Sluigi			howmany = needed;
1320259412Sluigi		lease_idx = nm_kr_lease(kring, howmany, 1);
1321259412Sluigi		mtx_unlock(&kring->q_lock);
1322259412Sluigi
1323259412Sluigi		/* only retry if we need more than available slots */
1324259412Sluigi		if (retry && needed <= howmany)
1325259412Sluigi			retry = 0;
1326259412Sluigi
1327259412Sluigi		/* copy to the destination queue */
1328259412Sluigi		while (howmany > 0) {
1329259412Sluigi			struct netmap_slot *slot;
1330259412Sluigi			struct nm_bdg_fwd *ft_p, *ft_end;
1331259412Sluigi			u_int cnt;
1332259412Sluigi			int fix_mismatch = offset_mismatch;
1333259412Sluigi
1334259412Sluigi			/* find the queue from which we pick next packet.
1335259412Sluigi			 * NM_FT_NULL is always higher than valid indexes
1336259412Sluigi			 * so we never dereference it if the other list
1337259412Sluigi			 * has packets (and if both are empty we never
1338259412Sluigi			 * get here).
1339259412Sluigi			 */
1340259412Sluigi			if (next < brd_next) {
1341259412Sluigi				ft_p = ft + next;
1342259412Sluigi				next = ft_p->ft_next;
1343259412Sluigi			} else { /* insert broadcast */
1344259412Sluigi				ft_p = ft + brd_next;
1345259412Sluigi				brd_next = ft_p->ft_next;
1346259412Sluigi			}
1347259412Sluigi			cnt = ft_p->ft_frags; // cnt > 0
1348259412Sluigi			if (unlikely(cnt > howmany))
1349259412Sluigi			    break; /* no more space */
1350259412Sluigi			howmany -= cnt;
1351259412Sluigi			if (netmap_verbose && cnt > 1)
1352259412Sluigi				RD(5, "rx %d frags to %d", cnt, j);
1353259412Sluigi			ft_end = ft_p + cnt;
1354259412Sluigi			do {
1355259412Sluigi			    char *dst, *src = ft_p->ft_buf;
1356259412Sluigi			    size_t copy_len = ft_p->ft_len, dst_len = copy_len;
1357259412Sluigi
1358259412Sluigi			    slot = &ring->slot[j];
1359259412Sluigi			    dst = BDG_NMB(&dst_na->up, slot);
1360259412Sluigi
1361259412Sluigi			    if (unlikely(fix_mismatch)) {
1362259412Sluigi				if (na->offset > dst_na->offset) {
1363259412Sluigi					src += na->offset - dst_na->offset;
1364259412Sluigi					copy_len -= na->offset - dst_na->offset;
1365259412Sluigi					dst_len = copy_len;
1366259412Sluigi				} else {
1367259412Sluigi					bzero(dst, dst_na->offset - na->offset);
1368259412Sluigi					dst_len += dst_na->offset - na->offset;
1369259412Sluigi					dst += dst_na->offset - na->offset;
1370259412Sluigi				}
1371259412Sluigi				/* fix the first fragment only */
1372259412Sluigi				fix_mismatch = 0;
1373259412Sluigi				/* completely skip an header only fragment */
1374259412Sluigi				if (copy_len == 0) {
1375259412Sluigi					ft_p++;
1376259412Sluigi					continue;
1377259412Sluigi				}
1378259412Sluigi			    }
1379259412Sluigi			    /* round to a multiple of 64 */
1380259412Sluigi			    copy_len = (copy_len + 63) & ~63;
1381259412Sluigi
1382259412Sluigi			    ND("send %d %d bytes at %s:%d",
1383259412Sluigi				i, ft_p->ft_len, NM_IFPNAME(dst_ifp), j);
1384259412Sluigi			    if (ft_p->ft_flags & NS_INDIRECT) {
1385259412Sluigi				if (copyin(src, dst, copy_len)) {
1386259412Sluigi					// invalid user pointer, pretend len is 0
1387259412Sluigi					dst_len = 0;
1388259412Sluigi				}
1389259412Sluigi			    } else {
1390259412Sluigi				//memcpy(dst, src, copy_len);
1391259412Sluigi				pkt_copy(src, dst, (int)copy_len);
1392259412Sluigi			    }
1393259412Sluigi			    slot->len = dst_len;
1394259412Sluigi			    slot->flags = (cnt << 8)| NS_MOREFRAG;
1395259412Sluigi			    j = nm_next(j, lim);
1396259412Sluigi			    ft_p++;
1397259412Sluigi			    sent++;
1398259412Sluigi			} while (ft_p != ft_end);
1399259412Sluigi			slot->flags = (cnt << 8); /* clear flag on last entry */
1400259412Sluigi			/* are we done ? */
1401259412Sluigi			if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
1402259412Sluigi				break;
1403259412Sluigi		}
1404259412Sluigi		{
1405259412Sluigi		    /* current position */
1406259412Sluigi		    uint32_t *p = kring->nkr_leases; /* shorthand */
1407259412Sluigi		    uint32_t update_pos;
1408259412Sluigi		    int still_locked = 1;
1409259412Sluigi
1410259412Sluigi		    mtx_lock(&kring->q_lock);
1411259412Sluigi		    if (unlikely(howmany > 0)) {
1412259412Sluigi			/* not used all bufs. If i am the last one
1413259412Sluigi			 * i can recover the slots, otherwise must
1414259412Sluigi			 * fill them with 0 to mark empty packets.
1415259412Sluigi			 */
1416259412Sluigi			ND("leftover %d bufs", howmany);
1417259412Sluigi			if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
1418259412Sluigi			    /* yes i am the last one */
1419259412Sluigi			    ND("roll back nkr_hwlease to %d", j);
1420259412Sluigi			    kring->nkr_hwlease = j;
1421259412Sluigi			} else {
1422259412Sluigi			    while (howmany-- > 0) {
1423259412Sluigi				ring->slot[j].len = 0;
1424259412Sluigi				ring->slot[j].flags = 0;
1425259412Sluigi				j = nm_next(j, lim);
1426259412Sluigi			    }
1427259412Sluigi			}
1428259412Sluigi		    }
1429259412Sluigi		    p[lease_idx] = j; /* report I am done */
1430259412Sluigi
1431259412Sluigi		    update_pos = nm_kr_rxpos(kring);
1432259412Sluigi
1433259412Sluigi		    if (my_start == update_pos) {
1434259412Sluigi			/* all slots before my_start have been reported,
1435259412Sluigi			 * so scan subsequent leases to see if other ranges
1436259412Sluigi			 * have been completed, and to a selwakeup or txsync.
1437259412Sluigi		         */
1438259412Sluigi			while (lease_idx != kring->nkr_lease_idx &&
1439259412Sluigi				p[lease_idx] != NR_NOSLOT) {
1440259412Sluigi			    j = p[lease_idx];
1441259412Sluigi			    p[lease_idx] = NR_NOSLOT;
1442259412Sluigi			    lease_idx = nm_next(lease_idx, lim);
1443259412Sluigi			}
1444259412Sluigi			/* j is the new 'write' position. j != my_start
1445259412Sluigi			 * means there are new buffers to report
1446259412Sluigi			 */
1447259412Sluigi			if (likely(j != my_start)) {
1448259412Sluigi				uint32_t old_avail = kring->nr_hwavail;
1449259412Sluigi
1450259412Sluigi				kring->nr_hwavail = (j >= kring->nr_hwcur) ?
1451259412Sluigi					j - kring->nr_hwcur :
1452259412Sluigi					j + lim + 1 - kring->nr_hwcur;
1453259412Sluigi				if (kring->nr_hwavail < old_avail) {
1454259412Sluigi					D("avail shrink %d -> %d",
1455259412Sluigi						old_avail, kring->nr_hwavail);
1456259412Sluigi				}
1457259412Sluigi				dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1458259412Sluigi				still_locked = 0;
1459259412Sluigi				mtx_unlock(&kring->q_lock);
1460259412Sluigi				if (dst_na->retry && retry--)
1461259412Sluigi					goto retry;
1462259412Sluigi			}
1463259412Sluigi		    }
1464259412Sluigi		    if (still_locked)
1465259412Sluigi			mtx_unlock(&kring->q_lock);
1466259412Sluigi		}
1467259412Sluigicleanup:
1468259412Sluigi		d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
1469259412Sluigi		d->bq_len = 0;
1470259412Sluigi	}
1471259412Sluigi	brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
1472259412Sluigi	brddst->bq_len = 0;
1473259412Sluigi	return 0;
1474259412Sluigi}
1475259412Sluigi
1476259412Sluigistatic int
1477259412Sluiginetmap_vp_txsync(struct netmap_vp_adapter *na, u_int ring_nr, int flags)
1478259412Sluigi{
1479259412Sluigi	struct netmap_kring *kring = &na->up.tx_rings[ring_nr];
1480259412Sluigi	struct netmap_ring *ring = kring->ring;
1481259412Sluigi	u_int j, k, lim = kring->nkr_num_slots - 1;
1482259412Sluigi
1483259412Sluigi	k = ring->cur;
1484259412Sluigi	if (k > lim)
1485259412Sluigi		return netmap_ring_reinit(kring);
1486259412Sluigi
1487259412Sluigi	if (bridge_batch <= 0) { /* testing only */
1488259412Sluigi		j = k; // used all
1489259412Sluigi		goto done;
1490259412Sluigi	}
1491259412Sluigi	if (bridge_batch > NM_BDG_BATCH)
1492259412Sluigi		bridge_batch = NM_BDG_BATCH;
1493259412Sluigi
1494259412Sluigi	j = nm_bdg_preflush(na, ring_nr, kring, k);
1495259412Sluigi	if (j != k)
1496259412Sluigi		D("early break at %d/ %d, avail %d", j, k, kring->nr_hwavail);
1497259412Sluigi	/* k-j modulo ring size is the number of slots processed */
1498259412Sluigi	if (k < j)
1499259412Sluigi		k += kring->nkr_num_slots;
1500259412Sluigi	kring->nr_hwavail = lim - (k - j);
1501259412Sluigi
1502259412Sluigidone:
1503259412Sluigi	kring->nr_hwcur = j;
1504259412Sluigi	ring->avail = kring->nr_hwavail;
1505259412Sluigi	if (netmap_verbose)
1506259412Sluigi		D("%s ring %d flags %d", NM_IFPNAME(na->up.ifp), ring_nr, flags);
1507259412Sluigi	return 0;
1508259412Sluigi}
1509259412Sluigi
1510259412Sluigi
1511259412Sluigi/*
1512259412Sluigi * main dispatch routine for the bridge.
1513259412Sluigi * We already know that only one thread is running this.
1514259412Sluigi * we must run nm_bdg_preflush without lock.
1515259412Sluigi */
1516259412Sluigistatic int
1517259412Sluigibdg_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
1518259412Sluigi{
1519259412Sluigi	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
1520259412Sluigi	return netmap_vp_txsync(vpna, ring_nr, flags);
1521259412Sluigi}
1522259412Sluigi
1523259412Sluigi
1524259412Sluigi/*
1525259412Sluigi * user process reading from a VALE switch.
1526259412Sluigi * Already protected against concurrent calls from userspace,
1527259412Sluigi * but we must acquire the queue's lock to protect against
1528259412Sluigi * writers on the same queue.
1529259412Sluigi */
1530259412Sluigistatic int
1531259412Sluigibdg_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
1532259412Sluigi{
1533259412Sluigi	struct netmap_kring *kring = &na->rx_rings[ring_nr];
1534259412Sluigi	struct netmap_ring *ring = kring->ring;
1535259412Sluigi	u_int j, lim = kring->nkr_num_slots - 1;
1536259412Sluigi	u_int k = ring->cur, resvd = ring->reserved;
1537259412Sluigi	int n;
1538259412Sluigi
1539259412Sluigi	mtx_lock(&kring->q_lock);
1540259412Sluigi	if (k > lim) {
1541259412Sluigi		D("ouch dangerous reset!!!");
1542259412Sluigi		n = netmap_ring_reinit(kring);
1543259412Sluigi		goto done;
1544259412Sluigi	}
1545259412Sluigi
1546259412Sluigi	/* skip past packets that userspace has released */
1547259412Sluigi	j = kring->nr_hwcur;    /* netmap ring index */
1548259412Sluigi	if (resvd > 0) {
1549259412Sluigi		if (resvd + ring->avail >= lim + 1) {
1550259412Sluigi			D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
1551259412Sluigi			ring->reserved = resvd = 0; // XXX panic...
1552259412Sluigi		}
1553259412Sluigi		k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
1554259412Sluigi	}
1555259412Sluigi
1556259412Sluigi	if (j != k) { /* userspace has released some packets. */
1557259412Sluigi		n = k - j;
1558259412Sluigi		if (n < 0)
1559259412Sluigi			n += kring->nkr_num_slots;
1560259412Sluigi		ND("userspace releases %d packets", n);
1561259412Sluigi		for (n = 0; likely(j != k); n++) {
1562259412Sluigi			struct netmap_slot *slot = &ring->slot[j];
1563259412Sluigi			void *addr = BDG_NMB(na, slot);
1564259412Sluigi
1565259412Sluigi			if (addr == netmap_buffer_base) { /* bad buf */
1566259412Sluigi				D("bad buffer index %d, ignore ?",
1567259412Sluigi					slot->buf_idx);
1568259412Sluigi			}
1569259412Sluigi			slot->flags &= ~NS_BUF_CHANGED;
1570259412Sluigi			j = nm_next(j, lim);
1571259412Sluigi		}
1572259412Sluigi		kring->nr_hwavail -= n;
1573259412Sluigi		kring->nr_hwcur = k;
1574259412Sluigi	}
1575259412Sluigi	/* tell userspace that there are new packets */
1576259412Sluigi	ring->avail = kring->nr_hwavail - resvd;
1577259412Sluigi	n = 0;
1578259412Sluigidone:
1579259412Sluigi	mtx_unlock(&kring->q_lock);
1580259412Sluigi	return n;
1581259412Sluigi}
1582259412Sluigi
1583259412Sluigistatic int
1584259412Sluigibdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp)
1585259412Sluigi{
1586259412Sluigi	struct netmap_vp_adapter *vpna;
1587259412Sluigi	struct netmap_adapter *na;
1588259412Sluigi	int error;
1589259412Sluigi
1590259412Sluigi	vpna = malloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO);
1591259412Sluigi	if (vpna == NULL)
1592259412Sluigi		return ENOMEM;
1593259412Sluigi
1594259412Sluigi 	na = &vpna->up;
1595259412Sluigi
1596259412Sluigi	na->ifp = ifp;
1597259412Sluigi
1598259412Sluigi	/* bound checking */
1599259412Sluigi	na->num_tx_rings = nmr->nr_tx_rings;
1600259412Sluigi	nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1601259412Sluigi	nmr->nr_tx_rings = na->num_tx_rings; // write back
1602259412Sluigi	na->num_rx_rings = nmr->nr_rx_rings;
1603259412Sluigi	nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1604259412Sluigi	nmr->nr_rx_rings = na->num_rx_rings; // write back
1605259412Sluigi	nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE,
1606259412Sluigi			1, NM_BDG_MAXSLOTS, NULL);
1607259412Sluigi	na->num_tx_desc = nmr->nr_tx_slots;
1608259412Sluigi	nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE,
1609259412Sluigi			1, NM_BDG_MAXSLOTS, NULL);
1610259412Sluigi	na->num_rx_desc = nmr->nr_rx_slots;
1611259412Sluigi	vpna->offset = 0;
1612259412Sluigi
1613259412Sluigi	na->na_flags |= NAF_BDG_MAYSLEEP | NAF_MEM_OWNER;
1614259412Sluigi	na->nm_txsync = bdg_netmap_txsync;
1615259412Sluigi	na->nm_rxsync = bdg_netmap_rxsync;
1616259412Sluigi	na->nm_register = bdg_netmap_reg;
1617259412Sluigi	na->nm_dtor = netmap_adapter_vp_dtor;
1618259412Sluigi	na->nm_krings_create = netmap_vp_krings_create;
1619259412Sluigi	na->nm_krings_delete = netmap_vp_krings_delete;
1620259412Sluigi	na->nm_mem = netmap_mem_private_new(NM_IFPNAME(na->ifp),
1621259412Sluigi			na->num_tx_rings, na->num_tx_desc,
1622259412Sluigi			na->num_rx_rings, na->num_rx_desc);
1623259412Sluigi	/* other nmd fields are set in the common routine */
1624259412Sluigi	error = netmap_attach_common(na);
1625259412Sluigi	if (error) {
1626259412Sluigi		free(vpna, M_DEVBUF);
1627259412Sluigi		return error;
1628259412Sluigi	}
1629259412Sluigi	return 0;
1630259412Sluigi}
1631259412Sluigi
1632259412Sluigistatic void
1633259412Sluiginetmap_bwrap_dtor(struct netmap_adapter *na)
1634259412Sluigi{
1635259412Sluigi	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
1636259412Sluigi	struct netmap_adapter *hwna = bna->hwna;
1637259412Sluigi	struct nm_bridge *b = bna->up.na_bdg,
1638259412Sluigi		*bh = bna->host.na_bdg;
1639259412Sluigi	struct ifnet *ifp = na->ifp;
1640259412Sluigi
1641259412Sluigi	ND("na %p", na);
1642259412Sluigi
1643259412Sluigi	if (b) {
1644259412Sluigi		netmap_bdg_detach_common(b, bna->up.bdg_port,
1645259412Sluigi			(bh ? bna->host.bdg_port : -1));
1646259412Sluigi	}
1647259412Sluigi
1648259412Sluigi	hwna->na_private = NULL;
1649259412Sluigi	netmap_adapter_put(hwna);
1650259412Sluigi
1651259412Sluigi	bzero(ifp, sizeof(*ifp));
1652259412Sluigi	free(ifp, M_DEVBUF);
1653259412Sluigi	na->ifp = NULL;
1654259412Sluigi
1655259412Sluigi}
1656259412Sluigi
1657259412Sluigi/*
1658259412Sluigi * Pass packets from nic to the bridge.
1659259412Sluigi * XXX TODO check locking: this is called from the interrupt
1660259412Sluigi * handler so we should make sure that the interface is not
1661259412Sluigi * disconnected while passing down an interrupt.
1662259412Sluigi *
1663259412Sluigi * Note, no user process can access this NIC so we can ignore
1664259412Sluigi * the info in the 'ring'.
1665259412Sluigi */
1666259412Sluigi/* callback that overwrites the hwna notify callback.
1667259412Sluigi * Packets come from the outside or from the host stack and are put on an hwna rx ring.
1668259412Sluigi * The bridge wrapper then sends the packets through the bridge.
1669259412Sluigi */
1670259412Sluigistatic int
1671259412Sluiginetmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, int flags)
1672259412Sluigi{
1673259412Sluigi	struct ifnet *ifp = na->ifp;
1674259412Sluigi	struct netmap_bwrap_adapter *bna = na->na_private;
1675259412Sluigi	struct netmap_vp_adapter *hostna = &bna->host;
1676259412Sluigi	struct netmap_kring *kring, *bkring;
1677259412Sluigi	struct netmap_ring *ring;
1678259412Sluigi	int is_host_ring = ring_nr == na->num_rx_rings;
1679259412Sluigi	struct netmap_vp_adapter *vpna = &bna->up;
1680259412Sluigi	int error = 0;
1681259412Sluigi
1682259412Sluigi	ND("%s[%d] %s %x", NM_IFPNAME(ifp), ring_nr, (tx == NR_TX ? "TX" : "RX"), flags);
1683259412Sluigi
1684259412Sluigi	if (flags & NAF_DISABLE_NOTIFY) {
1685259412Sluigi		kring = tx == NR_TX ? na->tx_rings : na->rx_rings;
1686259412Sluigi		bkring = tx == NR_TX ? vpna->up.rx_rings : vpna->up.tx_rings;
1687259412Sluigi		if (kring->nkr_stopped)
1688259412Sluigi			netmap_disable_ring(bkring);
1689259412Sluigi		else
1690259412Sluigi			bkring->nkr_stopped = 0;
1691259412Sluigi		return 0;
1692259412Sluigi	}
1693259412Sluigi
1694259412Sluigi	if (ifp == NULL || !(ifp->if_capenable & IFCAP_NETMAP))
1695259412Sluigi		return 0;
1696259412Sluigi
1697259412Sluigi	if (tx == NR_TX)
1698259412Sluigi		return 0;
1699259412Sluigi
1700259412Sluigi	kring = &na->rx_rings[ring_nr];
1701259412Sluigi	ring = kring->ring;
1702259412Sluigi
1703259412Sluigi	/* make sure the ring is not disabled */
1704259412Sluigi	if (nm_kr_tryget(kring))
1705259412Sluigi		return 0;
1706259412Sluigi
1707259412Sluigi	if (is_host_ring && hostna->na_bdg == NULL) {
1708259412Sluigi		error = bna->save_notify(na, ring_nr, tx, flags);
1709259412Sluigi		goto put_out;
1710259412Sluigi	}
1711259412Sluigi
1712259412Sluigi	if (is_host_ring) {
1713259412Sluigi		vpna = hostna;
1714259412Sluigi		ring_nr = 0;
1715259412Sluigi	} else {
1716259412Sluigi		/* fetch packets that have arrived.
1717259412Sluigi		 * XXX maybe do this in a loop ?
1718259412Sluigi		 */
1719259412Sluigi		error = na->nm_rxsync(na, ring_nr, 0);
1720259412Sluigi		if (error)
1721259412Sluigi			goto put_out;
1722259412Sluigi	}
1723259412Sluigi	if (kring->nr_hwavail == 0 && netmap_verbose) {
1724259412Sluigi		D("how strange, interrupt with no packets on %s",
1725259412Sluigi			NM_IFPNAME(ifp));
1726259412Sluigi		goto put_out;
1727259412Sluigi	}
1728259412Sluigi	/* XXX avail ? */
1729259412Sluigi	ring->cur = nm_kr_rxpos(kring);
1730259412Sluigi	netmap_vp_txsync(vpna, ring_nr, flags);
1731259412Sluigi
1732259412Sluigi	if (!is_host_ring)
1733259412Sluigi		error = na->nm_rxsync(na, ring_nr, 0);
1734259412Sluigi
1735259412Sluigiput_out:
1736259412Sluigi	nm_kr_put(kring);
1737259412Sluigi	return error;
1738259412Sluigi}
1739259412Sluigi
1740259412Sluigistatic int
1741259412Sluiginetmap_bwrap_register(struct netmap_adapter *na, int onoff)
1742259412Sluigi{
1743259412Sluigi	struct netmap_bwrap_adapter *bna =
1744259412Sluigi		(struct netmap_bwrap_adapter *)na;
1745259412Sluigi	struct netmap_adapter *hwna = bna->hwna;
1746259412Sluigi	struct netmap_vp_adapter *hostna = &bna->host;
1747259412Sluigi	int error;
1748259412Sluigi
1749259412Sluigi	ND("%s %d", NM_IFPNAME(ifp), onoff);
1750259412Sluigi
1751259412Sluigi	if (onoff) {
1752259412Sluigi		int i;
1753259412Sluigi
1754259412Sluigi		hwna->na_lut = na->na_lut;
1755259412Sluigi		hwna->na_lut_objtotal = na->na_lut_objtotal;
1756259412Sluigi
1757259412Sluigi		if (hostna->na_bdg) {
1758259412Sluigi			hostna->up.na_lut = na->na_lut;
1759259412Sluigi			hostna->up.na_lut_objtotal = na->na_lut_objtotal;
1760259412Sluigi		}
1761259412Sluigi
1762259412Sluigi		/* cross-link the netmap rings */
1763259412Sluigi		for (i = 0; i <= na->num_tx_rings; i++) {
1764259412Sluigi			hwna->tx_rings[i].nkr_num_slots = na->rx_rings[i].nkr_num_slots;
1765259412Sluigi			hwna->tx_rings[i].ring = na->rx_rings[i].ring;
1766259412Sluigi		}
1767259412Sluigi		for (i = 0; i <= na->num_rx_rings; i++) {
1768259412Sluigi			hwna->rx_rings[i].nkr_num_slots = na->tx_rings[i].nkr_num_slots;
1769259412Sluigi			hwna->rx_rings[i].ring = na->tx_rings[i].ring;
1770259412Sluigi		}
1771259412Sluigi	}
1772259412Sluigi
1773259412Sluigi	if (hwna->ifp) {
1774259412Sluigi		error = hwna->nm_register(hwna, onoff);
1775259412Sluigi		if (error)
1776259412Sluigi			return error;
1777259412Sluigi	}
1778259412Sluigi
1779259412Sluigi	bdg_netmap_reg(na, onoff);
1780259412Sluigi
1781259412Sluigi	if (onoff) {
1782259412Sluigi		bna->save_notify = hwna->nm_notify;
1783259412Sluigi		hwna->nm_notify = netmap_bwrap_intr_notify;
1784259412Sluigi	} else {
1785259412Sluigi		hwna->nm_notify = bna->save_notify;
1786259412Sluigi		hwna->na_lut = NULL;
1787259412Sluigi		hwna->na_lut_objtotal = 0;
1788259412Sluigi	}
1789259412Sluigi
1790259412Sluigi	return 0;
1791259412Sluigi}
1792259412Sluigi
1793259412Sluigistatic int
1794259412Sluiginetmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
1795259412Sluigi				    u_int *rxr, u_int *rxd)
1796259412Sluigi{
1797259412Sluigi	struct netmap_bwrap_adapter *bna =
1798259412Sluigi		(struct netmap_bwrap_adapter *)na;
1799259412Sluigi	struct netmap_adapter *hwna = bna->hwna;
1800259412Sluigi
1801259412Sluigi	/* forward the request */
1802259412Sluigi	netmap_update_config(hwna);
1803259412Sluigi	/* swap the results */
1804259412Sluigi	*txr = hwna->num_rx_rings;
1805259412Sluigi	*txd = hwna->num_rx_desc;
1806259412Sluigi	*rxr = hwna->num_tx_rings;
1807259412Sluigi	*rxd = hwna->num_rx_desc;
1808259412Sluigi
1809259412Sluigi	return 0;
1810259412Sluigi}
1811259412Sluigi
1812259412Sluigistatic int
1813259412Sluiginetmap_bwrap_krings_create(struct netmap_adapter *na)
1814259412Sluigi{
1815259412Sluigi	struct netmap_bwrap_adapter *bna =
1816259412Sluigi		(struct netmap_bwrap_adapter *)na;
1817259412Sluigi	struct netmap_adapter *hwna = bna->hwna;
1818259412Sluigi	struct netmap_adapter *hostna = &bna->host.up;
1819259412Sluigi	int error;
1820259412Sluigi
1821259412Sluigi	ND("%s", NM_IFPNAME(na->ifp));
1822259412Sluigi
1823259412Sluigi	error = netmap_vp_krings_create(na);
1824259412Sluigi	if (error)
1825259412Sluigi		return error;
1826259412Sluigi
1827259412Sluigi	error = hwna->nm_krings_create(hwna);
1828259412Sluigi	if (error) {
1829259412Sluigi		netmap_vp_krings_delete(na);
1830259412Sluigi		return error;
1831259412Sluigi	}
1832259412Sluigi
1833259412Sluigi	hostna->tx_rings = na->tx_rings + na->num_tx_rings;
1834259412Sluigi	hostna->rx_rings = na->rx_rings + na->num_rx_rings;
1835259412Sluigi
1836259412Sluigi	return 0;
1837259412Sluigi}
1838259412Sluigi
1839259412Sluigistatic void
1840259412Sluiginetmap_bwrap_krings_delete(struct netmap_adapter *na)
1841259412Sluigi{
1842259412Sluigi	struct netmap_bwrap_adapter *bna =
1843259412Sluigi		(struct netmap_bwrap_adapter *)na;
1844259412Sluigi	struct netmap_adapter *hwna = bna->hwna;
1845259412Sluigi
1846259412Sluigi	ND("%s", NM_IFPNAME(na->ifp));
1847259412Sluigi
1848259412Sluigi	hwna->nm_krings_delete(hwna);
1849259412Sluigi	netmap_vp_krings_delete(na);
1850259412Sluigi}
1851259412Sluigi
1852259412Sluigi/* notify method for the bridge-->hwna direction */
1853259412Sluigistatic int
1854259412Sluiginetmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
1855259412Sluigi{
1856259412Sluigi	struct netmap_bwrap_adapter *bna =
1857259412Sluigi		(struct netmap_bwrap_adapter *)na;
1858259412Sluigi	struct netmap_adapter *hwna = bna->hwna;
1859259412Sluigi	struct netmap_kring *kring, *hw_kring;
1860259412Sluigi	struct netmap_ring *ring;
1861259412Sluigi	u_int lim, k;
1862259412Sluigi	int error = 0;
1863259412Sluigi
1864259412Sluigi	if (tx == NR_TX)
1865259412Sluigi	        return ENXIO;
1866259412Sluigi
1867259412Sluigi	kring = &na->rx_rings[ring_n];
1868259412Sluigi	hw_kring = &hwna->tx_rings[ring_n];
1869259412Sluigi	ring = kring->ring;
1870259412Sluigi
1871259412Sluigi	lim = kring->nkr_num_slots - 1;
1872259412Sluigi	k = nm_kr_rxpos(kring);
1873259412Sluigi
1874259412Sluigi	if (hwna->ifp == NULL || !(hwna->ifp->if_capenable & IFCAP_NETMAP))
1875259412Sluigi		return 0;
1876259412Sluigi	ring->cur = k;
1877259412Sluigi	ND("%s[%d] PRE rx(%d, %d, %d, %d) ring(%d, %d, %d) tx(%d, %d)",
1878259412Sluigi		NM_IFPNAME(na->ifp), ring_n,
1879259412Sluigi		kring->nr_hwcur, kring->nr_hwavail, kring->nkr_hwlease, kring->nr_hwreserved,
1880259412Sluigi		ring->cur, ring->avail, ring->reserved,
1881259412Sluigi		hw_kring->nr_hwcur, hw_kring->nr_hwavail);
1882259412Sluigi	if (ring_n == na->num_rx_rings) {
1883259412Sluigi		netmap_txsync_to_host(hwna);
1884259412Sluigi	} else {
1885259412Sluigi		error = hwna->nm_txsync(hwna, ring_n, flags);
1886259412Sluigi	}
1887259412Sluigi	kring->nr_hwcur = ring->cur;
1888259412Sluigi	kring->nr_hwavail = 0;
1889259412Sluigi	kring->nr_hwreserved = lim - ring->avail;
1890259412Sluigi	ND("%s[%d] PST rx(%d, %d, %d, %d) ring(%d, %d, %d) tx(%d, %d)",
1891259412Sluigi		NM_IFPNAME(na->ifp), ring_n,
1892259412Sluigi		kring->nr_hwcur, kring->nr_hwavail, kring->nkr_hwlease, kring->nr_hwreserved,
1893259412Sluigi		ring->cur, ring->avail, ring->reserved,
1894259412Sluigi		hw_kring->nr_hwcur, hw_kring->nr_hwavail);
1895259412Sluigi
1896259412Sluigi	return error;
1897259412Sluigi}
1898259412Sluigi
1899259412Sluigistatic int
1900259412Sluiginetmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
1901259412Sluigi{
1902259412Sluigi	struct netmap_bwrap_adapter *bna = na->na_private;
1903259412Sluigi	struct netmap_adapter *port_na = &bna->up.up;
1904259412Sluigi	if (tx == NR_TX || ring_n != 0)
1905259412Sluigi		return ENXIO;
1906259412Sluigi	return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags);
1907259412Sluigi}
1908259412Sluigi
1909259412Sluigi/* attach a bridge wrapper to the 'real' device */
1910259412Sluigistatic int
1911259412Sluiginetmap_bwrap_attach(struct ifnet *fake, struct ifnet *real)
1912259412Sluigi{
1913259412Sluigi	struct netmap_bwrap_adapter *bna;
1914259412Sluigi	struct netmap_adapter *na;
1915259412Sluigi	struct netmap_adapter *hwna = NA(real);
1916259412Sluigi	struct netmap_adapter *hostna;
1917259412Sluigi	int error;
1918259412Sluigi
1919259412Sluigi
1920259412Sluigi	bna = malloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO);
1921259412Sluigi	if (bna == NULL)
1922259412Sluigi		return ENOMEM;
1923259412Sluigi
1924259412Sluigi	na = &bna->up.up;
1925259412Sluigi	na->ifp = fake;
1926259412Sluigi	/* fill the ring data for the bwrap adapter with rx/tx meanings
1927259412Sluigi	 * swapped. The real cross-linking will be done during register,
1928259412Sluigi	 * when all the krings will have been created.
1929259412Sluigi	 */
1930259412Sluigi	na->num_rx_rings = hwna->num_tx_rings;
1931259412Sluigi	na->num_tx_rings = hwna->num_rx_rings;
1932259412Sluigi	na->num_tx_desc = hwna->num_rx_desc;
1933259412Sluigi	na->num_rx_desc = hwna->num_tx_desc;
1934259412Sluigi	na->nm_dtor = netmap_bwrap_dtor;
1935259412Sluigi	na->nm_register = netmap_bwrap_register;
1936259412Sluigi	// na->nm_txsync = netmap_bwrap_txsync;
1937259412Sluigi	// na->nm_rxsync = netmap_bwrap_rxsync;
1938259412Sluigi	na->nm_config = netmap_bwrap_config;
1939259412Sluigi	na->nm_krings_create = netmap_bwrap_krings_create;
1940259412Sluigi	na->nm_krings_delete = netmap_bwrap_krings_delete;
1941259412Sluigi	na->nm_notify = netmap_bwrap_notify;
1942259412Sluigi	na->nm_mem = hwna->nm_mem;
1943259412Sluigi	na->na_private = na; /* prevent NIOCREGIF */
1944259412Sluigi	bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
1945259412Sluigi
1946259412Sluigi	bna->hwna = hwna;
1947259412Sluigi	netmap_adapter_get(hwna);
1948259412Sluigi	hwna->na_private = bna; /* weak reference */
1949259412Sluigi
1950259412Sluigi	hostna = &bna->host.up;
1951259412Sluigi	hostna->ifp = hwna->ifp;
1952259412Sluigi	hostna->num_tx_rings = 1;
1953259412Sluigi	hostna->num_tx_desc = hwna->num_rx_desc;
1954259412Sluigi	hostna->num_rx_rings = 1;
1955259412Sluigi	hostna->num_rx_desc = hwna->num_tx_desc;
1956259412Sluigi	// hostna->nm_txsync = netmap_bwrap_host_txsync;
1957259412Sluigi	// hostna->nm_rxsync = netmap_bwrap_host_rxsync;
1958259412Sluigi	hostna->nm_notify = netmap_bwrap_host_notify;
1959259412Sluigi	hostna->nm_mem = na->nm_mem;
1960259412Sluigi	hostna->na_private = bna;
1961259412Sluigi
1962259412Sluigi	D("%s<->%s txr %d txd %d rxr %d rxd %d", fake->if_xname, real->if_xname,
1963259412Sluigi		na->num_tx_rings, na->num_tx_desc,
1964259412Sluigi		na->num_rx_rings, na->num_rx_desc);
1965259412Sluigi
1966259412Sluigi	error = netmap_attach_common(na);
1967259412Sluigi	if (error) {
1968259412Sluigi		netmap_adapter_put(hwna);
1969259412Sluigi		free(bna, M_DEVBUF);
1970259412Sluigi		return error;
1971259412Sluigi	}
1972259412Sluigi	return 0;
1973259412Sluigi}
1974259412Sluigi
1975259412Sluigivoid
1976259412Sluiginetmap_init_bridges(void)
1977259412Sluigi{
1978259412Sluigi	int i;
1979259412Sluigi	bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */
1980259412Sluigi	for (i = 0; i < NM_BRIDGES; i++)
1981259412Sluigi		BDG_RWINIT(&nm_bridges[i]);
1982259412Sluigi}
1983259412Sluigi#endif /* WITH_VALE */
1984