netmap_vale.c revision 331722
1331722Seadler/*
2260368Sluigi * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
3259412Sluigi *
4259412Sluigi * Redistribution and use in source and binary forms, with or without
5259412Sluigi * modification, are permitted provided that the following conditions
6259412Sluigi * are met:
7259412Sluigi *   1. Redistributions of source code must retain the above copyright
8259412Sluigi *      notice, this list of conditions and the following disclaimer.
9259412Sluigi *   2. Redistributions in binary form must reproduce the above copyright
10259412Sluigi *      notice, this list of conditions and the following disclaimer in the
11259412Sluigi *      documentation and/or other materials provided with the distribution.
12259412Sluigi *
13259412Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14259412Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15259412Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16259412Sluigi * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17259412Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18259412Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19259412Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20259412Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21259412Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22259412Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23259412Sluigi * SUCH DAMAGE.
24259412Sluigi */
25259412Sluigi
26259412Sluigi
27259412Sluigi/*
28259412Sluigi * This module implements the VALE switch for netmap
29259412Sluigi
30259412Sluigi--- VALE SWITCH ---
31259412Sluigi
32259412SluigiNMG_LOCK() serializes all modifications to switches and ports.
33259412SluigiA switch cannot be deleted until all ports are gone.
34259412Sluigi
35259412SluigiFor each switch, an SX lock (RWlock on linux) protects
36259412Sluigideletion of ports. When configuring or deleting a new port, the
37259412Sluigilock is acquired in exclusive mode (after holding NMG_LOCK).
38259412SluigiWhen forwarding, the lock is acquired in shared mode (without NMG_LOCK).
39259412SluigiThe lock is held throughout the entire forwarding cycle,
40259412Sluigiduring which the thread may incur in a page fault.
41259412SluigiHence it is important that sleepable shared locks are used.
42259412Sluigi
43259412SluigiOn the rx ring, the per-port lock is grabbed initially to reserve
44259412Sluigia number of slot in the ring, then the lock is released,
45259412Sluigipackets are copied from source to destination, and then
46259412Sluigithe lock is acquired again and the receive ring is updated.
47259412Sluigi(A similar thing is done on the tx ring for NIC and host stack
48259412Sluigiports attached to the switch)
49259412Sluigi
50259412Sluigi */
51259412Sluigi
52259412Sluigi/*
53259412Sluigi * OS-specific code that is used only within this file.
54259412Sluigi * Other OS-specific code that must be accessed by drivers
55259412Sluigi * is present in netmap_kern.h
56259412Sluigi */
57259412Sluigi
58259412Sluigi#if defined(__FreeBSD__)
59259412Sluigi#include <sys/cdefs.h> /* prerequisite */
60259412Sluigi__FBSDID("$FreeBSD: stable/11/sys/dev/netmap/netmap_vale.c 331722 2018-03-29 02:50:57Z eadler $");
61259412Sluigi
62259412Sluigi#include <sys/types.h>
63259412Sluigi#include <sys/errno.h>
64259412Sluigi#include <sys/param.h>	/* defines used in kernel.h */
65259412Sluigi#include <sys/kernel.h>	/* types used in module initialization */
66259412Sluigi#include <sys/conf.h>	/* cdevsw struct, UID, GID */
67259412Sluigi#include <sys/sockio.h>
68259412Sluigi#include <sys/socketvar.h>	/* struct socket */
69259412Sluigi#include <sys/malloc.h>
70259412Sluigi#include <sys/poll.h>
71259412Sluigi#include <sys/rwlock.h>
72259412Sluigi#include <sys/socket.h> /* sockaddrs */
73259412Sluigi#include <sys/selinfo.h>
74259412Sluigi#include <sys/sysctl.h>
75259412Sluigi#include <net/if.h>
76259412Sluigi#include <net/if_var.h>
77259412Sluigi#include <net/bpf.h>		/* BIOCIMMEDIATE */
78259412Sluigi#include <machine/bus.h>	/* bus_dmamap_* */
79259412Sluigi#include <sys/endian.h>
80259412Sluigi#include <sys/refcount.h>
81259412Sluigi
82259412Sluigi
83259412Sluigi#define BDG_RWLOCK_T		struct rwlock // struct rwlock
84259412Sluigi
85259412Sluigi#define	BDG_RWINIT(b)		\
86259412Sluigi	rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS)
87259412Sluigi#define BDG_WLOCK(b)		rw_wlock(&(b)->bdg_lock)
88259412Sluigi#define BDG_WUNLOCK(b)		rw_wunlock(&(b)->bdg_lock)
89259412Sluigi#define BDG_RLOCK(b)		rw_rlock(&(b)->bdg_lock)
90259412Sluigi#define BDG_RTRYLOCK(b)		rw_try_rlock(&(b)->bdg_lock)
91259412Sluigi#define BDG_RUNLOCK(b)		rw_runlock(&(b)->bdg_lock)
92259412Sluigi#define BDG_RWDESTROY(b)	rw_destroy(&(b)->bdg_lock)
93259412Sluigi
94259412Sluigi
95259412Sluigi#elif defined(linux)
96259412Sluigi
97259412Sluigi#include "bsd_glue.h"
98259412Sluigi
99259412Sluigi#elif defined(__APPLE__)
100259412Sluigi
101259412Sluigi#warning OSX support is only partial
102259412Sluigi#include "osx_glue.h"
103259412Sluigi
104259412Sluigi#else
105259412Sluigi
106259412Sluigi#error	Unsupported platform
107259412Sluigi
108259412Sluigi#endif /* unsupported */
109259412Sluigi
110259412Sluigi/*
111259412Sluigi * common headers
112259412Sluigi */
113259412Sluigi
114259412Sluigi#include <net/netmap.h>
115259412Sluigi#include <dev/netmap/netmap_kern.h>
116259412Sluigi#include <dev/netmap/netmap_mem2.h>
117259412Sluigi
118259412Sluigi#ifdef WITH_VALE
119259412Sluigi
120259412Sluigi/*
121259412Sluigi * system parameters (most of them in netmap_kern.h)
122259412Sluigi * NM_NAME	prefix for switch port names, default "vale"
123259412Sluigi * NM_BDG_MAXPORTS	number of ports
124259412Sluigi * NM_BRIDGES	max number of switches in the system.
125259412Sluigi *	XXX should become a sysctl or tunable
126259412Sluigi *
127259412Sluigi * Switch ports are named valeX:Y where X is the switch name and Y
128259412Sluigi * is the port. If Y matches a physical interface name, the port is
129259412Sluigi * connected to a physical device.
130259412Sluigi *
131259412Sluigi * Unlike physical interfaces, switch ports use their own memory region
132259412Sluigi * for rings and buffers.
133259412Sluigi * The virtual interfaces use per-queue lock instead of core lock.
134259412Sluigi * In the tx loop, we aggregate traffic in batches to make all operations
135259412Sluigi * faster. The batch size is bridge_batch.
136259412Sluigi */
137259412Sluigi#define NM_BDG_MAXRINGS		16	/* XXX unclear how many. */
138259412Sluigi#define NM_BDG_MAXSLOTS		4096	/* XXX same as above */
139259412Sluigi#define NM_BRIDGE_RINGSIZE	1024	/* in the device */
140259412Sluigi#define NM_BDG_HASH		1024	/* forwarding table entries */
141259412Sluigi#define NM_BDG_BATCH		1024	/* entries in the forwarding buffer */
142259412Sluigi#define NM_MULTISEG		64	/* max size of a chain of bufs */
143259412Sluigi/* actual size of the tables */
144259412Sluigi#define NM_BDG_BATCH_MAX	(NM_BDG_BATCH + NM_MULTISEG)
145259412Sluigi/* NM_FT_NULL terminates a list of slots in the ft */
146259412Sluigi#define NM_FT_NULL		NM_BDG_BATCH_MAX
147259412Sluigi#define	NM_BRIDGES		8	/* number of bridges */
148259412Sluigi
149259412Sluigi
150259412Sluigi/*
151259412Sluigi * bridge_batch is set via sysctl to the max batch size to be
152259412Sluigi * used in the bridge. The actual value may be larger as the
153259412Sluigi * last packet in the block may overflow the size.
154259412Sluigi */
155259412Sluigiint bridge_batch = NM_BDG_BATCH; /* bridge batch size */
156259412SluigiSYSCTL_DECL(_dev_netmap);
157259412SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
158259412Sluigi
159259412Sluigi
160270063Sluigistatic int netmap_vp_create(struct nmreq *, struct ifnet *, struct netmap_vp_adapter **);
161270063Sluigistatic int netmap_vp_reg(struct netmap_adapter *na, int onoff);
162259412Sluigistatic int netmap_bwrap_register(struct netmap_adapter *, int onoff);
163259412Sluigi
164259412Sluigi/*
165259412Sluigi * For each output interface, nm_bdg_q is used to construct a list.
166259412Sluigi * bq_len is the number of output buffers (we can have coalescing
167259412Sluigi * during the copy).
168259412Sluigi */
169259412Sluigistruct nm_bdg_q {
170259412Sluigi	uint16_t bq_head;
171259412Sluigi	uint16_t bq_tail;
172259412Sluigi	uint32_t bq_len;	/* number of buffers */
173259412Sluigi};
174259412Sluigi
175259412Sluigi/* XXX revise this */
176259412Sluigistruct nm_hash_ent {
177259412Sluigi	uint64_t	mac;	/* the top 2 bytes are the epoch */
178259412Sluigi	uint64_t	ports;
179259412Sluigi};
180259412Sluigi
181259412Sluigi/*
182259412Sluigi * nm_bridge is a descriptor for a VALE switch.
183259412Sluigi * Interfaces for a bridge are all in bdg_ports[].
184259412Sluigi * The array has fixed size, an empty entry does not terminate
185259412Sluigi * the search, but lookups only occur on attach/detach so we
186259412Sluigi * don't mind if they are slow.
187259412Sluigi *
188259412Sluigi * The bridge is non blocking on the transmit ports: excess
189259412Sluigi * packets are dropped if there is no room on the output port.
190259412Sluigi *
191259412Sluigi * bdg_lock protects accesses to the bdg_ports array.
192259412Sluigi * This is a rw lock (or equivalent).
193259412Sluigi */
194259412Sluigistruct nm_bridge {
195259412Sluigi	/* XXX what is the proper alignment/layout ? */
196259412Sluigi	BDG_RWLOCK_T	bdg_lock;	/* protects bdg_ports */
197259412Sluigi	int		bdg_namelen;
198259412Sluigi	uint32_t	bdg_active_ports; /* 0 means free */
199259412Sluigi	char		bdg_basename[IFNAMSIZ];
200259412Sluigi
201259412Sluigi	/* Indexes of active ports (up to active_ports)
202259412Sluigi	 * and all other remaining ports.
203259412Sluigi	 */
204259412Sluigi	uint8_t		bdg_port_index[NM_BDG_MAXPORTS];
205259412Sluigi
206259412Sluigi	struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS];
207259412Sluigi
208259412Sluigi
209259412Sluigi	/*
210259412Sluigi	 * The function to decide the destination port.
211259412Sluigi	 * It returns either of an index of the destination port,
212259412Sluigi	 * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
213259412Sluigi	 * forward this packet.  ring_nr is the source ring index, and the
214259412Sluigi	 * function may overwrite this value to forward this packet to a
215259412Sluigi	 * different ring index.
216259412Sluigi	 * This function must be set by netmap_bdgctl().
217259412Sluigi	 */
218270063Sluigi	struct netmap_bdg_ops bdg_ops;
219259412Sluigi
220259412Sluigi	/* the forwarding table, MAC+ports.
221259412Sluigi	 * XXX should be changed to an argument to be passed to
222259412Sluigi	 * the lookup function, and allocated on attach
223259412Sluigi	 */
224259412Sluigi	struct nm_hash_ent ht[NM_BDG_HASH];
225285349Sluigi
226285349Sluigi#ifdef CONFIG_NET_NS
227285349Sluigi	struct net *ns;
228285349Sluigi#endif /* CONFIG_NET_NS */
229259412Sluigi};
230259412Sluigi
231270063Sluigiconst char*
232270063Sluiginetmap_bdg_name(struct netmap_vp_adapter *vp)
233270063Sluigi{
234270063Sluigi	struct nm_bridge *b = vp->na_bdg;
235270063Sluigi	if (b == NULL)
236270063Sluigi		return NULL;
237270063Sluigi	return b->bdg_basename;
238270063Sluigi}
239259412Sluigi
240270063Sluigi
241285349Sluigi#ifndef CONFIG_NET_NS
242259412Sluigi/*
243259412Sluigi * XXX in principle nm_bridges could be created dynamically
244259412Sluigi * Right now we have a static array and deletions are protected
245259412Sluigi * by an exclusive lock.
246259412Sluigi */
247285349Sluigistruct nm_bridge *nm_bridges;
248285349Sluigi#endif /* !CONFIG_NET_NS */
249259412Sluigi
250259412Sluigi
251259412Sluigi/*
252259412Sluigi * this is a slightly optimized copy routine which rounds
253259412Sluigi * to multiple of 64 bytes and is often faster than dealing
254259412Sluigi * with other odd sizes. We assume there is enough room
255259412Sluigi * in the source and destination buffers.
256259412Sluigi *
257259412Sluigi * XXX only for multiples of 64 bytes, non overlapped.
258259412Sluigi */
259259412Sluigistatic inline void
260259412Sluigipkt_copy(void *_src, void *_dst, int l)
261259412Sluigi{
262259412Sluigi        uint64_t *src = _src;
263259412Sluigi        uint64_t *dst = _dst;
264259412Sluigi        if (unlikely(l >= 1024)) {
265259412Sluigi                memcpy(dst, src, l);
266259412Sluigi                return;
267259412Sluigi        }
268259412Sluigi        for (; likely(l > 0); l-=64) {
269259412Sluigi                *dst++ = *src++;
270259412Sluigi                *dst++ = *src++;
271259412Sluigi                *dst++ = *src++;
272259412Sluigi                *dst++ = *src++;
273259412Sluigi                *dst++ = *src++;
274259412Sluigi                *dst++ = *src++;
275259412Sluigi                *dst++ = *src++;
276259412Sluigi                *dst++ = *src++;
277259412Sluigi        }
278259412Sluigi}
279259412Sluigi
280259412Sluigi
281259412Sluigi/*
282259412Sluigi * locate a bridge among the existing ones.
283259412Sluigi * MUST BE CALLED WITH NMG_LOCK()
284259412Sluigi *
285259412Sluigi * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
286259412Sluigi * We assume that this is called with a name of at least NM_NAME chars.
287259412Sluigi */
288259412Sluigistatic struct nm_bridge *
289259412Sluiginm_find_bridge(const char *name, int create)
290259412Sluigi{
291259412Sluigi	int i, l, namelen;
292285349Sluigi	struct nm_bridge *b = NULL, *bridges;
293285349Sluigi	u_int num_bridges;
294259412Sluigi
295259412Sluigi	NMG_LOCK_ASSERT();
296259412Sluigi
297285349Sluigi	netmap_bns_getbridges(&bridges, &num_bridges);
298285349Sluigi
299259412Sluigi	namelen = strlen(NM_NAME);	/* base length */
300259412Sluigi	l = name ? strlen(name) : 0;		/* actual length */
301259412Sluigi	if (l < namelen) {
302259412Sluigi		D("invalid bridge name %s", name ? name : NULL);
303259412Sluigi		return NULL;
304259412Sluigi	}
305259412Sluigi	for (i = namelen + 1; i < l; i++) {
306259412Sluigi		if (name[i] == ':') {
307259412Sluigi			namelen = i;
308259412Sluigi			break;
309259412Sluigi		}
310259412Sluigi	}
311259412Sluigi	if (namelen >= IFNAMSIZ)
312259412Sluigi		namelen = IFNAMSIZ;
313259412Sluigi	ND("--- prefix is '%.*s' ---", namelen, name);
314259412Sluigi
315259412Sluigi	/* lookup the name, remember empty slot if there is one */
316285349Sluigi	for (i = 0; i < num_bridges; i++) {
317285349Sluigi		struct nm_bridge *x = bridges + i;
318259412Sluigi
319259412Sluigi		if (x->bdg_active_ports == 0) {
320259412Sluigi			if (create && b == NULL)
321259412Sluigi				b = x;	/* record empty slot */
322259412Sluigi		} else if (x->bdg_namelen != namelen) {
323259412Sluigi			continue;
324259412Sluigi		} else if (strncmp(name, x->bdg_basename, namelen) == 0) {
325259412Sluigi			ND("found '%.*s' at %d", namelen, name, i);
326259412Sluigi			b = x;
327259412Sluigi			break;
328259412Sluigi		}
329259412Sluigi	}
330285349Sluigi	if (i == num_bridges && b) { /* name not found, can create entry */
331259412Sluigi		/* initialize the bridge */
332259412Sluigi		strncpy(b->bdg_basename, name, namelen);
333259412Sluigi		ND("create new bridge %s with ports %d", b->bdg_basename,
334259412Sluigi			b->bdg_active_ports);
335259412Sluigi		b->bdg_namelen = namelen;
336259412Sluigi		b->bdg_active_ports = 0;
337259412Sluigi		for (i = 0; i < NM_BDG_MAXPORTS; i++)
338259412Sluigi			b->bdg_port_index[i] = i;
339259412Sluigi		/* set the default function */
340270063Sluigi		b->bdg_ops.lookup = netmap_bdg_learning;
341259412Sluigi		/* reset the MAC address table */
342259412Sluigi		bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
343285349Sluigi		NM_BNS_GET(b);
344259412Sluigi	}
345259412Sluigi	return b;
346259412Sluigi}
347259412Sluigi
348259412Sluigi
349259412Sluigi/*
350259412Sluigi * Free the forwarding tables for rings attached to switch ports.
351259412Sluigi */
352259412Sluigistatic void
353259412Sluiginm_free_bdgfwd(struct netmap_adapter *na)
354259412Sluigi{
355259412Sluigi	int nrings, i;
356259412Sluigi	struct netmap_kring *kring;
357259412Sluigi
358259412Sluigi	NMG_LOCK_ASSERT();
359260368Sluigi	nrings = na->num_tx_rings;
360260368Sluigi	kring = na->tx_rings;
361259412Sluigi	for (i = 0; i < nrings; i++) {
362259412Sluigi		if (kring[i].nkr_ft) {
363259412Sluigi			free(kring[i].nkr_ft, M_DEVBUF);
364259412Sluigi			kring[i].nkr_ft = NULL; /* protect from freeing twice */
365259412Sluigi		}
366259412Sluigi	}
367259412Sluigi}
368259412Sluigi
369259412Sluigi
370259412Sluigi/*
371259412Sluigi * Allocate the forwarding tables for the rings attached to the bridge ports.
372259412Sluigi */
373259412Sluigistatic int
374259412Sluiginm_alloc_bdgfwd(struct netmap_adapter *na)
375259412Sluigi{
376259412Sluigi	int nrings, l, i, num_dstq;
377259412Sluigi	struct netmap_kring *kring;
378259412Sluigi
379259412Sluigi	NMG_LOCK_ASSERT();
380259412Sluigi	/* all port:rings + broadcast */
381259412Sluigi	num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
382259412Sluigi	l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
383259412Sluigi	l += sizeof(struct nm_bdg_q) * num_dstq;
384259412Sluigi	l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
385259412Sluigi
386285349Sluigi	nrings = netmap_real_rings(na, NR_TX);
387259412Sluigi	kring = na->tx_rings;
388259412Sluigi	for (i = 0; i < nrings; i++) {
389259412Sluigi		struct nm_bdg_fwd *ft;
390259412Sluigi		struct nm_bdg_q *dstq;
391259412Sluigi		int j;
392259412Sluigi
393259412Sluigi		ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
394259412Sluigi		if (!ft) {
395259412Sluigi			nm_free_bdgfwd(na);
396259412Sluigi			return ENOMEM;
397259412Sluigi		}
398259412Sluigi		dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
399259412Sluigi		for (j = 0; j < num_dstq; j++) {
400259412Sluigi			dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
401259412Sluigi			dstq[j].bq_len = 0;
402259412Sluigi		}
403259412Sluigi		kring[i].nkr_ft = ft;
404259412Sluigi	}
405259412Sluigi	return 0;
406259412Sluigi}
407259412Sluigi
408259412Sluigi
409270063Sluigi/* remove from bridge b the ports in slots hw and sw
410270063Sluigi * (sw can be -1 if not needed)
411270063Sluigi */
412259412Sluigistatic void
413259412Sluiginetmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
414259412Sluigi{
415259412Sluigi	int s_hw = hw, s_sw = sw;
416259412Sluigi	int i, lim =b->bdg_active_ports;
417259412Sluigi	uint8_t tmp[NM_BDG_MAXPORTS];
418259412Sluigi
419259412Sluigi	/*
420259412Sluigi	New algorithm:
421259412Sluigi	make a copy of bdg_port_index;
422259412Sluigi	lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
423259412Sluigi	in the array of bdg_port_index, replacing them with
424259412Sluigi	entries from the bottom of the array;
425259412Sluigi	decrement bdg_active_ports;
426259412Sluigi	acquire BDG_WLOCK() and copy back the array.
427259412Sluigi	 */
428259412Sluigi
429261909Sluigi	if (netmap_verbose)
430261909Sluigi		D("detach %d and %d (lim %d)", hw, sw, lim);
431259412Sluigi	/* make a copy of the list of active ports, update it,
432259412Sluigi	 * and then copy back within BDG_WLOCK().
433259412Sluigi	 */
434259412Sluigi	memcpy(tmp, b->bdg_port_index, sizeof(tmp));
435259412Sluigi	for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
436259412Sluigi		if (hw >= 0 && tmp[i] == hw) {
437259412Sluigi			ND("detach hw %d at %d", hw, i);
438259412Sluigi			lim--; /* point to last active port */
439259412Sluigi			tmp[i] = tmp[lim]; /* swap with i */
440259412Sluigi			tmp[lim] = hw;	/* now this is inactive */
441259412Sluigi			hw = -1;
442259412Sluigi		} else if (sw >= 0 && tmp[i] == sw) {
443259412Sluigi			ND("detach sw %d at %d", sw, i);
444259412Sluigi			lim--;
445259412Sluigi			tmp[i] = tmp[lim];
446259412Sluigi			tmp[lim] = sw;
447259412Sluigi			sw = -1;
448259412Sluigi		} else {
449259412Sluigi			i++;
450259412Sluigi		}
451259412Sluigi	}
452259412Sluigi	if (hw >= 0 || sw >= 0) {
453259412Sluigi		D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
454259412Sluigi	}
455259412Sluigi
456259412Sluigi	BDG_WLOCK(b);
457270063Sluigi	if (b->bdg_ops.dtor)
458270063Sluigi		b->bdg_ops.dtor(b->bdg_ports[s_hw]);
459259412Sluigi	b->bdg_ports[s_hw] = NULL;
460259412Sluigi	if (s_sw >= 0) {
461259412Sluigi		b->bdg_ports[s_sw] = NULL;
462259412Sluigi	}
463259412Sluigi	memcpy(b->bdg_port_index, tmp, sizeof(tmp));
464259412Sluigi	b->bdg_active_ports = lim;
465259412Sluigi	BDG_WUNLOCK(b);
466259412Sluigi
467259412Sluigi	ND("now %d active ports", lim);
468259412Sluigi	if (lim == 0) {
469259412Sluigi		ND("marking bridge %s as free", b->bdg_basename);
470270063Sluigi		bzero(&b->bdg_ops, sizeof(b->bdg_ops));
471285349Sluigi		NM_BNS_PUT(b);
472259412Sluigi	}
473259412Sluigi}
474259412Sluigi
475270063Sluigi/* nm_bdg_ctl callback for VALE ports */
476270063Sluigistatic int
477270063Sluiginetmap_vp_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
478270063Sluigi{
479270063Sluigi	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
480270063Sluigi	struct nm_bridge *b = vpna->na_bdg;
481260368Sluigi
482270063Sluigi	if (attach)
483270063Sluigi		return 0; /* nothing to do */
484270063Sluigi	if (b) {
485270063Sluigi		netmap_set_all_rings(na, 0 /* disable */);
486270063Sluigi		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
487270063Sluigi		vpna->na_bdg = NULL;
488270063Sluigi		netmap_set_all_rings(na, 1 /* enable */);
489270063Sluigi	}
490270063Sluigi	/* I have took reference just for attach */
491270063Sluigi	netmap_adapter_put(na);
492270063Sluigi	return 0;
493270063Sluigi}
494270063Sluigi
495270063Sluigi/* nm_dtor callback for ephemeral VALE ports */
496259412Sluigistatic void
497270063Sluiginetmap_vp_dtor(struct netmap_adapter *na)
498259412Sluigi{
499259412Sluigi	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
500259412Sluigi	struct nm_bridge *b = vpna->na_bdg;
501259412Sluigi
502270063Sluigi	ND("%s has %d references", na->name, na->na_refcount);
503259412Sluigi
504259412Sluigi	if (b) {
505259412Sluigi		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
506259412Sluigi	}
507270063Sluigi}
508259412Sluigi
509270063Sluigi/* remove a persistent VALE port from the system */
510270063Sluigistatic int
511270063Sluiginm_vi_destroy(const char *name)
512270063Sluigi{
513270063Sluigi	struct ifnet *ifp;
514270063Sluigi	int error;
515260368Sluigi
516270063Sluigi	ifp = ifunit_ref(name);
517270063Sluigi	if (!ifp)
518270063Sluigi		return ENXIO;
519270063Sluigi	NMG_LOCK();
520270063Sluigi	/* make sure this is actually a VALE port */
521270063Sluigi	if (!NETMAP_CAPABLE(ifp) || NA(ifp)->nm_register != netmap_vp_reg) {
522270063Sluigi		error = EINVAL;
523270063Sluigi		goto err;
524270063Sluigi	}
525270063Sluigi
526270063Sluigi	if (NA(ifp)->na_refcount > 1) {
527270063Sluigi		error = EBUSY;
528270063Sluigi		goto err;
529270063Sluigi	}
530270063Sluigi	NMG_UNLOCK();
531270063Sluigi
532270063Sluigi	D("destroying a persistent vale interface %s", ifp->if_xname);
533270063Sluigi	/* Linux requires all the references are released
534270063Sluigi	 * before unregister
535270063Sluigi	 */
536270063Sluigi	if_rele(ifp);
537270063Sluigi	netmap_detach(ifp);
538285698Sluigi	nm_vi_detach(ifp);
539270063Sluigi	return 0;
540270063Sluigi
541270063Sluigierr:
542270063Sluigi	NMG_UNLOCK();
543270063Sluigi	if_rele(ifp);
544270063Sluigi	return error;
545270063Sluigi}
546270063Sluigi
547270063Sluigi/*
548270063Sluigi * Create a virtual interface registered to the system.
549270063Sluigi * The interface will be attached to a bridge later.
550270063Sluigi */
551270063Sluigistatic int
552270063Sluiginm_vi_create(struct nmreq *nmr)
553270063Sluigi{
554270063Sluigi	struct ifnet *ifp;
555270063Sluigi	struct netmap_vp_adapter *vpna;
556270063Sluigi	int error;
557270063Sluigi
558270063Sluigi	/* don't include VALE prefix */
559270063Sluigi	if (!strncmp(nmr->nr_name, NM_NAME, strlen(NM_NAME)))
560270063Sluigi		return EINVAL;
561270063Sluigi	ifp = ifunit_ref(nmr->nr_name);
562270063Sluigi	if (ifp) { /* already exist, cannot create new one */
563270063Sluigi		if_rele(ifp);
564270063Sluigi		return EEXIST;
565270063Sluigi	}
566270063Sluigi	error = nm_vi_persist(nmr->nr_name, &ifp);
567270063Sluigi	if (error)
568270063Sluigi		return error;
569270063Sluigi
570270063Sluigi	NMG_LOCK();
571270063Sluigi	/* netmap_vp_create creates a struct netmap_vp_adapter */
572270063Sluigi	error = netmap_vp_create(nmr, ifp, &vpna);
573270063Sluigi	if (error) {
574270063Sluigi		D("error %d", error);
575270063Sluigi		nm_vi_detach(ifp);
576270063Sluigi		return error;
577270063Sluigi	}
578270063Sluigi	/* persist-specific routines */
579270063Sluigi	vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl;
580270063Sluigi	netmap_adapter_get(&vpna->up);
581270063Sluigi	NMG_UNLOCK();
582270063Sluigi	D("created %s", ifp->if_xname);
583270063Sluigi	return 0;
584270063Sluigi}
585270063Sluigi
586260368Sluigi/* Try to get a reference to a netmap adapter attached to a VALE switch.
587260368Sluigi * If the adapter is found (or is created), this function returns 0, a
588260368Sluigi * non NULL pointer is returned into *na, and the caller holds a
589260368Sluigi * reference to the adapter.
590260368Sluigi * If an adapter is not found, then no reference is grabbed and the
591260368Sluigi * function returns an error code, or 0 if there is just a VALE prefix
592260368Sluigi * mismatch. Therefore the caller holds a reference when
593260368Sluigi * (*na != NULL && return == 0).
594260368Sluigi */
595259412Sluigiint
596259412Sluiginetmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
597259412Sluigi{
598270063Sluigi	char *nr_name = nmr->nr_name;
599270063Sluigi	const char *ifname;
600259412Sluigi	struct ifnet *ifp;
601259412Sluigi	int error = 0;
602270063Sluigi	struct netmap_vp_adapter *vpna, *hostna = NULL;
603259412Sluigi	struct nm_bridge *b;
604259412Sluigi	int i, j, cand = -1, cand2 = -1;
605259412Sluigi	int needed;
606259412Sluigi
607259412Sluigi	*na = NULL;     /* default return value */
608259412Sluigi
609259412Sluigi	/* first try to see if this is a bridge port. */
610259412Sluigi	NMG_LOCK_ASSERT();
611270063Sluigi	if (strncmp(nr_name, NM_NAME, sizeof(NM_NAME) - 1)) {
612259412Sluigi		return 0;  /* no error, but no VALE prefix */
613259412Sluigi	}
614259412Sluigi
615270063Sluigi	b = nm_find_bridge(nr_name, create);
616259412Sluigi	if (b == NULL) {
617270063Sluigi		D("no bridges available for '%s'", nr_name);
618260700Sluigi		return (create ? ENOMEM : ENXIO);
619259412Sluigi	}
620270063Sluigi	if (strlen(nr_name) < b->bdg_namelen) /* impossible */
621270063Sluigi		panic("x");
622259412Sluigi
623259412Sluigi	/* Now we are sure that name starts with the bridge's name,
624259412Sluigi	 * lookup the port in the bridge. We need to scan the entire
625259412Sluigi	 * list. It is not important to hold a WLOCK on the bridge
626259412Sluigi	 * during the search because NMG_LOCK already guarantees
627259412Sluigi	 * that there are no other possible writers.
628259412Sluigi	 */
629259412Sluigi
630259412Sluigi	/* lookup in the local list of ports */
631259412Sluigi	for (j = 0; j < b->bdg_active_ports; j++) {
632259412Sluigi		i = b->bdg_port_index[j];
633259412Sluigi		vpna = b->bdg_ports[i];
634259412Sluigi		// KASSERT(na != NULL);
635285349Sluigi		ND("checking %s", vpna->up.name);
636270063Sluigi		if (!strcmp(vpna->up.name, nr_name)) {
637259412Sluigi			netmap_adapter_get(&vpna->up);
638270063Sluigi			ND("found existing if %s refs %d", nr_name)
639270063Sluigi			*na = &vpna->up;
640259412Sluigi			return 0;
641259412Sluigi		}
642259412Sluigi	}
643259412Sluigi	/* not found, should we create it? */
644259412Sluigi	if (!create)
645259412Sluigi		return ENXIO;
646259412Sluigi	/* yes we should, see if we have space to attach entries */
647259412Sluigi	needed = 2; /* in some cases we only need 1 */
648259412Sluigi	if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
649259412Sluigi		D("bridge full %d, cannot create new port", b->bdg_active_ports);
650260700Sluigi		return ENOMEM;
651259412Sluigi	}
652259412Sluigi	/* record the next two ports available, but do not allocate yet */
653259412Sluigi	cand = b->bdg_port_index[b->bdg_active_ports];
654259412Sluigi	cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
655259412Sluigi	ND("+++ bridge %s port %s used %d avail %d %d",
656270063Sluigi		b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2);
657259412Sluigi
658259412Sluigi	/*
659259412Sluigi	 * try see if there is a matching NIC with this name
660259412Sluigi	 * (after the bridge's name)
661259412Sluigi	 */
662270063Sluigi	ifname = nr_name + b->bdg_namelen + 1;
663270063Sluigi	ifp = ifunit_ref(ifname);
664270063Sluigi	if (!ifp) {
665270063Sluigi		/* Create an ephemeral virtual port
666270063Sluigi		 * This block contains all the ephemeral-specific logics
667270063Sluigi		 */
668259412Sluigi		if (nmr->nr_cmd) {
669259412Sluigi			/* nr_cmd must be 0 for a virtual port */
670259412Sluigi			return EINVAL;
671259412Sluigi		}
672259412Sluigi
673259412Sluigi		/* bdg_netmap_attach creates a struct netmap_adapter */
674270063Sluigi		error = netmap_vp_create(nmr, NULL, &vpna);
675259412Sluigi		if (error) {
676259412Sluigi			D("error %d", error);
677259412Sluigi			free(ifp, M_DEVBUF);
678259412Sluigi			return error;
679259412Sluigi		}
680270063Sluigi		/* shortcut - we can skip get_hw_na(),
681270063Sluigi		 * ownership check and nm_bdg_attach()
682270063Sluigi		 */
683270063Sluigi	} else {
684270063Sluigi		struct netmap_adapter *hw;
685259412Sluigi
686270063Sluigi		error = netmap_get_hw_na(ifp, &hw);
687270063Sluigi		if (error || hw == NULL)
688259412Sluigi			goto out;
689259412Sluigi
690270063Sluigi		/* host adapter might not be created */
691270063Sluigi		error = hw->nm_bdg_attach(nr_name, hw);
692270063Sluigi		if (error)
693259412Sluigi			goto out;
694270063Sluigi		vpna = hw->na_vp;
695270063Sluigi		hostna = hw->na_hostvp;
696270063Sluigi		if_rele(ifp);
697259412Sluigi		if (nmr->nr_arg1 != NETMAP_BDG_HOST)
698270063Sluigi			hostna = NULL;
699259412Sluigi	}
700259412Sluigi
701259412Sluigi	BDG_WLOCK(b);
702259412Sluigi	vpna->bdg_port = cand;
703259412Sluigi	ND("NIC  %p to bridge port %d", vpna, cand);
704259412Sluigi	/* bind the port to the bridge (virtual ports are not active) */
705259412Sluigi	b->bdg_ports[cand] = vpna;
706259412Sluigi	vpna->na_bdg = b;
707259412Sluigi	b->bdg_active_ports++;
708270063Sluigi	if (hostna != NULL) {
709259412Sluigi		/* also bind the host stack to the bridge */
710259412Sluigi		b->bdg_ports[cand2] = hostna;
711259412Sluigi		hostna->bdg_port = cand2;
712259412Sluigi		hostna->na_bdg = b;
713259412Sluigi		b->bdg_active_ports++;
714259412Sluigi		ND("host %p to bridge port %d", hostna, cand2);
715259412Sluigi	}
716270063Sluigi	ND("if %s refs %d", ifname, vpna->up.na_refcount);
717259412Sluigi	BDG_WUNLOCK(b);
718270063Sluigi	*na = &vpna->up;
719270063Sluigi	netmap_adapter_get(*na);
720259412Sluigi	return 0;
721259412Sluigi
722259412Sluigiout:
723259412Sluigi	if_rele(ifp);
724259412Sluigi
725259412Sluigi	return error;
726259412Sluigi}
727259412Sluigi
728259412Sluigi
729270063Sluigi/* Process NETMAP_BDG_ATTACH */
730259412Sluigistatic int
731270063Sluiginm_bdg_ctl_attach(struct nmreq *nmr)
732259412Sluigi{
733259412Sluigi	struct netmap_adapter *na;
734259412Sluigi	int error;
735259412Sluigi
736259412Sluigi	NMG_LOCK();
737260700Sluigi
738260368Sluigi	error = netmap_get_bdg_na(nmr, &na, 1 /* create if not exists */);
739270063Sluigi	if (error) /* no device */
740259412Sluigi		goto unlock_exit;
741260700Sluigi
742260368Sluigi	if (na == NULL) { /* VALE prefix missing */
743259412Sluigi		error = EINVAL;
744260368Sluigi		goto unlock_exit;
745259412Sluigi	}
746259412Sluigi
747270063Sluigi	if (NETMAP_OWNED_BY_ANY(na)) {
748259412Sluigi		error = EBUSY;
749259412Sluigi		goto unref_exit;
750259412Sluigi	}
751259412Sluigi
752270063Sluigi	if (na->nm_bdg_ctl) {
753270063Sluigi		/* nop for VALE ports. The bwrap needs to put the hwna
754270063Sluigi		 * in netmap mode (see netmap_bwrap_bdg_ctl)
755270063Sluigi		 */
756270063Sluigi		error = na->nm_bdg_ctl(na, nmr, 1);
757270063Sluigi		if (error)
758270063Sluigi			goto unref_exit;
759270063Sluigi		ND("registered %s to netmap-mode", na->name);
760259412Sluigi	}
761259412Sluigi	NMG_UNLOCK();
762259412Sluigi	return 0;
763259412Sluigi
764259412Sluigiunref_exit:
765259412Sluigi	netmap_adapter_put(na);
766259412Sluigiunlock_exit:
767259412Sluigi	NMG_UNLOCK();
768259412Sluigi	return error;
769259412Sluigi}
770259412Sluigi
771260368Sluigi
772270063Sluigi/* process NETMAP_BDG_DETACH */
773259412Sluigistatic int
774270063Sluiginm_bdg_ctl_detach(struct nmreq *nmr)
775259412Sluigi{
776259412Sluigi	struct netmap_adapter *na;
777259412Sluigi	int error;
778259412Sluigi
779259412Sluigi	NMG_LOCK();
780260368Sluigi	error = netmap_get_bdg_na(nmr, &na, 0 /* don't create */);
781259412Sluigi	if (error) { /* no device, or another bridge or user owns the device */
782259412Sluigi		goto unlock_exit;
783259412Sluigi	}
784260700Sluigi
785260368Sluigi	if (na == NULL) { /* VALE prefix missing */
786259412Sluigi		error = EINVAL;
787260368Sluigi		goto unlock_exit;
788259412Sluigi	}
789260368Sluigi
790270063Sluigi	if (na->nm_bdg_ctl) {
791270063Sluigi		/* remove the port from bridge. The bwrap
792270063Sluigi		 * also needs to put the hwna in normal mode
793270063Sluigi		 */
794270063Sluigi		error = na->nm_bdg_ctl(na, nmr, 0);
795259412Sluigi	}
796259412Sluigi
797259412Sluigi	netmap_adapter_put(na);
798259412Sluigiunlock_exit:
799259412Sluigi	NMG_UNLOCK();
800259412Sluigi	return error;
801259412Sluigi
802259412Sluigi}
803259412Sluigi
804259412Sluigi
805270063Sluigi/* Called by either user's context (netmap_ioctl())
806270063Sluigi * or external kernel modules (e.g., Openvswitch).
807270063Sluigi * Operation is indicated in nmr->nr_cmd.
808270063Sluigi * NETMAP_BDG_OPS that sets configure/lookup/dtor functions to the bridge
809270063Sluigi * requires bdg_ops argument; the other commands ignore this argument.
810270063Sluigi *
811259412Sluigi * Called without NMG_LOCK.
812259412Sluigi */
813259412Sluigiint
814270063Sluiginetmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
815259412Sluigi{
816285349Sluigi	struct nm_bridge *b, *bridges;
817259412Sluigi	struct netmap_adapter *na;
818259412Sluigi	struct netmap_vp_adapter *vpna;
819259412Sluigi	char *name = nmr->nr_name;
820259412Sluigi	int cmd = nmr->nr_cmd, namelen = strlen(name);
821259412Sluigi	int error = 0, i, j;
822285349Sluigi	u_int num_bridges;
823259412Sluigi
824285349Sluigi	netmap_bns_getbridges(&bridges, &num_bridges);
825285349Sluigi
826259412Sluigi	switch (cmd) {
827270063Sluigi	case NETMAP_BDG_NEWIF:
828270063Sluigi		error = nm_vi_create(nmr);
829270063Sluigi		break;
830270063Sluigi
831270063Sluigi	case NETMAP_BDG_DELIF:
832270063Sluigi		error = nm_vi_destroy(nmr->nr_name);
833270063Sluigi		break;
834270063Sluigi
835259412Sluigi	case NETMAP_BDG_ATTACH:
836270063Sluigi		error = nm_bdg_ctl_attach(nmr);
837259412Sluigi		break;
838259412Sluigi
839259412Sluigi	case NETMAP_BDG_DETACH:
840270063Sluigi		error = nm_bdg_ctl_detach(nmr);
841259412Sluigi		break;
842259412Sluigi
843259412Sluigi	case NETMAP_BDG_LIST:
844259412Sluigi		/* this is used to enumerate bridges and ports */
845259412Sluigi		if (namelen) { /* look up indexes of bridge and port */
846259412Sluigi			if (strncmp(name, NM_NAME, strlen(NM_NAME))) {
847259412Sluigi				error = EINVAL;
848259412Sluigi				break;
849259412Sluigi			}
850259412Sluigi			NMG_LOCK();
851259412Sluigi			b = nm_find_bridge(name, 0 /* don't create */);
852259412Sluigi			if (!b) {
853259412Sluigi				error = ENOENT;
854259412Sluigi				NMG_UNLOCK();
855259412Sluigi				break;
856259412Sluigi			}
857259412Sluigi
858259412Sluigi			error = ENOENT;
859259412Sluigi			for (j = 0; j < b->bdg_active_ports; j++) {
860259412Sluigi				i = b->bdg_port_index[j];
861259412Sluigi				vpna = b->bdg_ports[i];
862259412Sluigi				if (vpna == NULL) {
863259412Sluigi					D("---AAAAAAAAARGH-------");
864259412Sluigi					continue;
865259412Sluigi				}
866259412Sluigi				/* the former and the latter identify a
867259412Sluigi				 * virtual port and a NIC, respectively
868259412Sluigi				 */
869270063Sluigi				if (!strcmp(vpna->up.name, name)) {
870259412Sluigi					/* bridge index */
871285349Sluigi					nmr->nr_arg1 = b - bridges;
872259412Sluigi					nmr->nr_arg2 = i; /* port index */
873259412Sluigi					error = 0;
874259412Sluigi					break;
875259412Sluigi				}
876259412Sluigi			}
877259412Sluigi			NMG_UNLOCK();
878259412Sluigi		} else {
879259412Sluigi			/* return the first non-empty entry starting from
880259412Sluigi			 * bridge nr_arg1 and port nr_arg2.
881259412Sluigi			 *
882259412Sluigi			 * Users can detect the end of the same bridge by
883259412Sluigi			 * seeing the new and old value of nr_arg1, and can
884259412Sluigi			 * detect the end of all the bridge by error != 0
885259412Sluigi			 */
886259412Sluigi			i = nmr->nr_arg1;
887259412Sluigi			j = nmr->nr_arg2;
888259412Sluigi
889259412Sluigi			NMG_LOCK();
890259412Sluigi			for (error = ENOENT; i < NM_BRIDGES; i++) {
891285349Sluigi				b = bridges + i;
892259412Sluigi				if (j >= b->bdg_active_ports) {
893259412Sluigi					j = 0; /* following bridges scan from 0 */
894259412Sluigi					continue;
895259412Sluigi				}
896259412Sluigi				nmr->nr_arg1 = i;
897259412Sluigi				nmr->nr_arg2 = j;
898259412Sluigi				j = b->bdg_port_index[j];
899259412Sluigi				vpna = b->bdg_ports[j];
900270063Sluigi				strncpy(name, vpna->up.name, (size_t)IFNAMSIZ);
901259412Sluigi				error = 0;
902259412Sluigi				break;
903259412Sluigi			}
904259412Sluigi			NMG_UNLOCK();
905259412Sluigi		}
906259412Sluigi		break;
907259412Sluigi
908270063Sluigi	case NETMAP_BDG_REGOPS: /* XXX this should not be available from userspace */
909270063Sluigi		/* register callbacks to the given bridge.
910259412Sluigi		 * nmr->nr_name may be just bridge's name (including ':'
911259412Sluigi		 * if it is not just NM_NAME).
912259412Sluigi		 */
913270063Sluigi		if (!bdg_ops) {
914259412Sluigi			error = EINVAL;
915259412Sluigi			break;
916259412Sluigi		}
917259412Sluigi		NMG_LOCK();
918259412Sluigi		b = nm_find_bridge(name, 0 /* don't create */);
919259412Sluigi		if (!b) {
920259412Sluigi			error = EINVAL;
921259412Sluigi		} else {
922270063Sluigi			b->bdg_ops = *bdg_ops;
923259412Sluigi		}
924259412Sluigi		NMG_UNLOCK();
925259412Sluigi		break;
926259412Sluigi
927261909Sluigi	case NETMAP_BDG_VNET_HDR:
928261909Sluigi		/* Valid lengths for the virtio-net header are 0 (no header),
929261909Sluigi		   10 and 12. */
930261909Sluigi		if (nmr->nr_arg1 != 0 &&
931261909Sluigi			nmr->nr_arg1 != sizeof(struct nm_vnet_hdr) &&
932261909Sluigi				nmr->nr_arg1 != 12) {
933261909Sluigi			error = EINVAL;
934261909Sluigi			break;
935261909Sluigi		}
936259412Sluigi		NMG_LOCK();
937259412Sluigi		error = netmap_get_bdg_na(nmr, &na, 0);
938260368Sluigi		if (na && !error) {
939259412Sluigi			vpna = (struct netmap_vp_adapter *)na;
940261909Sluigi			vpna->virt_hdr_len = nmr->nr_arg1;
941261909Sluigi			if (vpna->virt_hdr_len)
942270063Sluigi				vpna->mfs = NETMAP_BUF_SIZE(na);
943261909Sluigi			D("Using vnet_hdr_len %d for %p", vpna->virt_hdr_len, vpna);
944260368Sluigi			netmap_adapter_put(na);
945259412Sluigi		}
946259412Sluigi		NMG_UNLOCK();
947259412Sluigi		break;
948259412Sluigi
949259412Sluigi	default:
950259412Sluigi		D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd);
951259412Sluigi		error = EINVAL;
952259412Sluigi		break;
953259412Sluigi	}
954259412Sluigi	return error;
955259412Sluigi}
956259412Sluigi
957270063Sluigiint
958270063Sluiginetmap_bdg_config(struct nmreq *nmr)
959270063Sluigi{
960270063Sluigi	struct nm_bridge *b;
961270063Sluigi	int error = EINVAL;
962270063Sluigi
963270063Sluigi	NMG_LOCK();
964270063Sluigi	b = nm_find_bridge(nmr->nr_name, 0);
965270063Sluigi	if (!b) {
966270063Sluigi		NMG_UNLOCK();
967270063Sluigi		return error;
968270063Sluigi	}
969270063Sluigi	NMG_UNLOCK();
970270063Sluigi	/* Don't call config() with NMG_LOCK() held */
971270063Sluigi	BDG_RLOCK(b);
972270063Sluigi	if (b->bdg_ops.config != NULL)
973270063Sluigi		error = b->bdg_ops.config((struct nm_ifreq *)nmr);
974270063Sluigi	BDG_RUNLOCK(b);
975270063Sluigi	return error;
976270063Sluigi}
977270063Sluigi
978270063Sluigi
979270063Sluigi/* nm_krings_create callback for VALE ports.
980270063Sluigi * Calls the standard netmap_krings_create, then adds leases on rx
981270063Sluigi * rings and bdgfwd on tx rings.
982270063Sluigi */
983259412Sluigistatic int
984259412Sluiginetmap_vp_krings_create(struct netmap_adapter *na)
985259412Sluigi{
986261909Sluigi	u_int tailroom;
987259412Sluigi	int error, i;
988259412Sluigi	uint32_t *leases;
989285349Sluigi	u_int nrx = netmap_real_rings(na, NR_RX);
990259412Sluigi
991259412Sluigi	/*
992259412Sluigi	 * Leases are attached to RX rings on vale ports
993259412Sluigi	 */
994259412Sluigi	tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;
995259412Sluigi
996261909Sluigi	error = netmap_krings_create(na, tailroom);
997259412Sluigi	if (error)
998259412Sluigi		return error;
999259412Sluigi
1000259412Sluigi	leases = na->tailroom;
1001259412Sluigi
1002259412Sluigi	for (i = 0; i < nrx; i++) { /* Receive rings */
1003259412Sluigi		na->rx_rings[i].nkr_leases = leases;
1004259412Sluigi		leases += na->num_rx_desc;
1005259412Sluigi	}
1006259412Sluigi
1007259412Sluigi	error = nm_alloc_bdgfwd(na);
1008259412Sluigi	if (error) {
1009259412Sluigi		netmap_krings_delete(na);
1010259412Sluigi		return error;
1011259412Sluigi	}
1012259412Sluigi
1013259412Sluigi	return 0;
1014259412Sluigi}
1015259412Sluigi
1016260368Sluigi
1017270063Sluigi/* nm_krings_delete callback for VALE ports. */
1018259412Sluigistatic void
1019259412Sluiginetmap_vp_krings_delete(struct netmap_adapter *na)
1020259412Sluigi{
1021259412Sluigi	nm_free_bdgfwd(na);
1022259412Sluigi	netmap_krings_delete(na);
1023259412Sluigi}
1024259412Sluigi
1025259412Sluigi
1026259412Sluigistatic int
1027259412Sluiginm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
1028259412Sluigi	struct netmap_vp_adapter *na, u_int ring_nr);
1029259412Sluigi
1030259412Sluigi
1031259412Sluigi/*
1032270063Sluigi * main dispatch routine for the bridge.
1033259412Sluigi * Grab packets from a kring, move them into the ft structure
1034259412Sluigi * associated to the tx (input) port. Max one instance per port,
1035259412Sluigi * filtered on input (ioctl, poll or XXX).
1036259412Sluigi * Returns the next position in the ring.
1037259412Sluigi */
1038259412Sluigistatic int
1039270063Sluiginm_bdg_preflush(struct netmap_kring *kring, u_int end)
1040259412Sluigi{
1041270063Sluigi	struct netmap_vp_adapter *na =
1042270063Sluigi		(struct netmap_vp_adapter*)kring->na;
1043259412Sluigi	struct netmap_ring *ring = kring->ring;
1044259412Sluigi	struct nm_bdg_fwd *ft;
1045270063Sluigi	u_int ring_nr = kring->ring_id;
1046259412Sluigi	u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
1047259412Sluigi	u_int ft_i = 0;	/* start from 0 */
1048259412Sluigi	u_int frags = 1; /* how many frags ? */
1049259412Sluigi	struct nm_bridge *b = na->na_bdg;
1050259412Sluigi
1051259412Sluigi	/* To protect against modifications to the bridge we acquire a
1052259412Sluigi	 * shared lock, waiting if we can sleep (if the source port is
1053259412Sluigi	 * attached to a user process) or with a trylock otherwise (NICs).
1054259412Sluigi	 */
1055259412Sluigi	ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
1056259412Sluigi	if (na->up.na_flags & NAF_BDG_MAYSLEEP)
1057259412Sluigi		BDG_RLOCK(b);
1058259412Sluigi	else if (!BDG_RTRYLOCK(b))
1059259412Sluigi		return 0;
1060259412Sluigi	ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
1061259412Sluigi	ft = kring->nkr_ft;
1062259412Sluigi
1063259412Sluigi	for (; likely(j != end); j = nm_next(j, lim)) {
1064259412Sluigi		struct netmap_slot *slot = &ring->slot[j];
1065259412Sluigi		char *buf;
1066259412Sluigi
1067259412Sluigi		ft[ft_i].ft_len = slot->len;
1068259412Sluigi		ft[ft_i].ft_flags = slot->flags;
1069259412Sluigi
1070259412Sluigi		ND("flags is 0x%x", slot->flags);
1071285349Sluigi		/* we do not use the buf changed flag, but we still need to reset it */
1072285349Sluigi		slot->flags &= ~NS_BUF_CHANGED;
1073285349Sluigi
1074259412Sluigi		/* this slot goes into a list so initialize the link field */
1075259412Sluigi		ft[ft_i].ft_next = NM_FT_NULL;
1076259412Sluigi		buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
1077270063Sluigi			(void *)(uintptr_t)slot->ptr : NMB(&na->up, slot);
1078267151Sluigi		if (unlikely(buf == NULL)) {
1079267151Sluigi			RD(5, "NULL %s buffer pointer from %s slot %d len %d",
1080267151Sluigi				(slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT",
1081267151Sluigi				kring->name, j, ft[ft_i].ft_len);
1082270063Sluigi			buf = ft[ft_i].ft_buf = NETMAP_BUF_BASE(&na->up);
1083267151Sluigi			ft[ft_i].ft_len = 0;
1084267151Sluigi			ft[ft_i].ft_flags = 0;
1085267151Sluigi		}
1086259487Sluigi		__builtin_prefetch(buf);
1087259412Sluigi		++ft_i;
1088259412Sluigi		if (slot->flags & NS_MOREFRAG) {
1089259412Sluigi			frags++;
1090259412Sluigi			continue;
1091259412Sluigi		}
1092259412Sluigi		if (unlikely(netmap_verbose && frags > 1))
1093259412Sluigi			RD(5, "%d frags at %d", frags, ft_i - frags);
1094259412Sluigi		ft[ft_i - frags].ft_frags = frags;
1095259412Sluigi		frags = 1;
1096259412Sluigi		if (unlikely((int)ft_i >= bridge_batch))
1097259412Sluigi			ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1098259412Sluigi	}
1099259412Sluigi	if (frags > 1) {
1100259412Sluigi		D("truncate incomplete fragment at %d (%d frags)", ft_i, frags);
1101259412Sluigi		// ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG
1102259412Sluigi		ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG;
1103259412Sluigi		ft[ft_i - frags].ft_frags = frags - 1;
1104259412Sluigi	}
1105259412Sluigi	if (ft_i)
1106259412Sluigi		ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1107259412Sluigi	BDG_RUNLOCK(b);
1108259412Sluigi	return j;
1109259412Sluigi}
1110259412Sluigi
1111259412Sluigi
1112259412Sluigi/* ----- FreeBSD if_bridge hash function ------- */
1113259412Sluigi
1114259412Sluigi/*
1115259412Sluigi * The following hash function is adapted from "Hash Functions" by Bob Jenkins
1116259412Sluigi * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
1117259412Sluigi *
1118259412Sluigi * http://www.burtleburtle.net/bob/hash/spooky.html
1119259412Sluigi */
1120259412Sluigi#define mix(a, b, c)                                                    \
1121259412Sluigido {                                                                    \
1122259412Sluigi        a -= b; a -= c; a ^= (c >> 13);                                 \
1123259412Sluigi        b -= c; b -= a; b ^= (a << 8);                                  \
1124259412Sluigi        c -= a; c -= b; c ^= (b >> 13);                                 \
1125259412Sluigi        a -= b; a -= c; a ^= (c >> 12);                                 \
1126259412Sluigi        b -= c; b -= a; b ^= (a << 16);                                 \
1127259412Sluigi        c -= a; c -= b; c ^= (b >> 5);                                  \
1128259412Sluigi        a -= b; a -= c; a ^= (c >> 3);                                  \
1129259412Sluigi        b -= c; b -= a; b ^= (a << 10);                                 \
1130259412Sluigi        c -= a; c -= b; c ^= (b >> 15);                                 \
1131259412Sluigi} while (/*CONSTCOND*/0)
1132259412Sluigi
1133260368Sluigi
1134259412Sluigistatic __inline uint32_t
1135259412Sluiginm_bridge_rthash(const uint8_t *addr)
1136259412Sluigi{
1137259412Sluigi        uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
1138259412Sluigi
1139259412Sluigi        b += addr[5] << 8;
1140259412Sluigi        b += addr[4];
1141259412Sluigi        a += addr[3] << 24;
1142259412Sluigi        a += addr[2] << 16;
1143259412Sluigi        a += addr[1] << 8;
1144259412Sluigi        a += addr[0];
1145259412Sluigi
1146259412Sluigi        mix(a, b, c);
1147259412Sluigi#define BRIDGE_RTHASH_MASK	(NM_BDG_HASH-1)
1148259412Sluigi        return (c & BRIDGE_RTHASH_MASK);
1149259412Sluigi}
1150259412Sluigi
1151259412Sluigi#undef mix
1152259412Sluigi
1153259412Sluigi
1154270063Sluigi/* nm_register callback for VALE ports */
1155259412Sluigistatic int
1156270063Sluiginetmap_vp_reg(struct netmap_adapter *na, int onoff)
1157259412Sluigi{
1158259412Sluigi	struct netmap_vp_adapter *vpna =
1159259412Sluigi		(struct netmap_vp_adapter*)na;
1160259412Sluigi
1161270063Sluigi	/* persistent ports may be put in netmap mode
1162270063Sluigi	 * before being attached to a bridge
1163259412Sluigi	 */
1164270063Sluigi	if (vpna->na_bdg)
1165270063Sluigi		BDG_WLOCK(vpna->na_bdg);
1166259412Sluigi	if (onoff) {
1167270063Sluigi		na->na_flags |= NAF_NETMAP_ON;
1168270063Sluigi		 /* XXX on FreeBSD, persistent VALE ports should also
1169270063Sluigi		 * toggle IFCAP_NETMAP in na->ifp (2014-03-16)
1170270063Sluigi		 */
1171259412Sluigi	} else {
1172270063Sluigi		na->na_flags &= ~NAF_NETMAP_ON;
1173259412Sluigi	}
1174270063Sluigi	if (vpna->na_bdg)
1175270063Sluigi		BDG_WUNLOCK(vpna->na_bdg);
1176259412Sluigi	return 0;
1177259412Sluigi}
1178259412Sluigi
1179259412Sluigi
1180259412Sluigi/*
1181259412Sluigi * Lookup function for a learning bridge.
1182259412Sluigi * Update the hash table with the source address,
1183259412Sluigi * and then returns the destination port index, and the
1184259412Sluigi * ring in *dst_ring (at the moment, always use ring 0)
1185259412Sluigi */
1186259412Sluigiu_int
1187270063Sluiginetmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
1188285349Sluigi		struct netmap_vp_adapter *na)
1189259412Sluigi{
1190270063Sluigi	uint8_t *buf = ft->ft_buf;
1191270063Sluigi	u_int buf_len = ft->ft_len;
1192259412Sluigi	struct nm_hash_ent *ht = na->na_bdg->ht;
1193259412Sluigi	uint32_t sh, dh;
1194259412Sluigi	u_int dst, mysrc = na->bdg_port;
1195259412Sluigi	uint64_t smac, dmac;
1196259412Sluigi
1197270063Sluigi	/* safety check, unfortunately we have many cases */
1198270063Sluigi	if (buf_len >= 14 + na->virt_hdr_len) {
1199270063Sluigi		/* virthdr + mac_hdr in the same slot */
1200270063Sluigi		buf += na->virt_hdr_len;
1201270063Sluigi		buf_len -= na->virt_hdr_len;
1202270063Sluigi	} else if (buf_len == na->virt_hdr_len && ft->ft_flags & NS_MOREFRAG) {
1203270063Sluigi		/* only header in first fragment */
1204270063Sluigi		ft++;
1205270063Sluigi		buf = ft->ft_buf;
1206270063Sluigi		buf_len = ft->ft_len;
1207270063Sluigi	} else {
1208270063Sluigi		RD(5, "invalid buf format, length %d", buf_len);
1209259412Sluigi		return NM_BDG_NOPORT;
1210259412Sluigi	}
1211259412Sluigi	dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
1212259412Sluigi	smac = le64toh(*(uint64_t *)(buf + 4));
1213259412Sluigi	smac >>= 16;
1214259412Sluigi
1215259412Sluigi	/*
1216259412Sluigi	 * The hash is somewhat expensive, there might be some
1217259412Sluigi	 * worthwhile optimizations here.
1218259412Sluigi	 */
1219285349Sluigi	if (((buf[6] & 1) == 0) && (na->last_smac != smac)) { /* valid src */
1220259412Sluigi		uint8_t *s = buf+6;
1221259412Sluigi		sh = nm_bridge_rthash(s); // XXX hash of source
1222259412Sluigi		/* update source port forwarding entry */
1223285349Sluigi		na->last_smac = ht[sh].mac = smac;	/* XXX expire ? */
1224259412Sluigi		ht[sh].ports = mysrc;
1225259412Sluigi		if (netmap_verbose)
1226259412Sluigi		    D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
1227259412Sluigi			s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
1228259412Sluigi	}
1229259412Sluigi	dst = NM_BDG_BROADCAST;
1230259412Sluigi	if ((buf[0] & 1) == 0) { /* unicast */
1231259412Sluigi		dh = nm_bridge_rthash(buf); // XXX hash of dst
1232259412Sluigi		if (ht[dh].mac == dmac) {	/* found dst */
1233259412Sluigi			dst = ht[dh].ports;
1234259412Sluigi		}
1235259412Sluigi		/* XXX otherwise return NM_BDG_UNKNOWN ? */
1236259412Sluigi	}
1237259412Sluigi	return dst;
1238259412Sluigi}
1239259412Sluigi
1240259412Sluigi
1241259412Sluigi/*
1242260368Sluigi * Available space in the ring. Only used in VALE code
1243260368Sluigi * and only with is_rx = 1
1244260368Sluigi */
1245260368Sluigistatic inline uint32_t
1246260368Sluiginm_kr_space(struct netmap_kring *k, int is_rx)
1247260368Sluigi{
1248260368Sluigi	int space;
1249260368Sluigi
1250260368Sluigi	if (is_rx) {
1251260368Sluigi		int busy = k->nkr_hwlease - k->nr_hwcur;
1252260368Sluigi		if (busy < 0)
1253260368Sluigi			busy += k->nkr_num_slots;
1254260368Sluigi		space = k->nkr_num_slots - 1 - busy;
1255260368Sluigi	} else {
1256260368Sluigi		/* XXX never used in this branch */
1257260368Sluigi		space = k->nr_hwtail - k->nkr_hwlease;
1258260368Sluigi		if (space < 0)
1259260368Sluigi			space += k->nkr_num_slots;
1260260368Sluigi	}
1261260368Sluigi#if 0
1262260368Sluigi	// sanity check
1263260368Sluigi	if (k->nkr_hwlease >= k->nkr_num_slots ||
1264260368Sluigi		k->nr_hwcur >= k->nkr_num_slots ||
1265260368Sluigi		k->nr_tail >= k->nkr_num_slots ||
1266260368Sluigi		busy < 0 ||
1267260368Sluigi		busy >= k->nkr_num_slots) {
1268260368Sluigi		D("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d",			k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
1269260368Sluigi			k->nkr_lease_idx, k->nkr_num_slots);
1270260368Sluigi	}
1271260368Sluigi#endif
1272260368Sluigi	return space;
1273260368Sluigi}
1274260368Sluigi
1275260368Sluigi
1276260368Sluigi
1277260368Sluigi
1278260368Sluigi/* make a lease on the kring for N positions. return the
1279260368Sluigi * lease index
1280260368Sluigi * XXX only used in VALE code and with is_rx = 1
1281260368Sluigi */
1282260368Sluigistatic inline uint32_t
1283260368Sluiginm_kr_lease(struct netmap_kring *k, u_int n, int is_rx)
1284260368Sluigi{
1285260368Sluigi	uint32_t lim = k->nkr_num_slots - 1;
1286260368Sluigi	uint32_t lease_idx = k->nkr_lease_idx;
1287260368Sluigi
1288260368Sluigi	k->nkr_leases[lease_idx] = NR_NOSLOT;
1289260368Sluigi	k->nkr_lease_idx = nm_next(lease_idx, lim);
1290260368Sluigi
1291260368Sluigi	if (n > nm_kr_space(k, is_rx)) {
1292260368Sluigi		D("invalid request for %d slots", n);
1293260368Sluigi		panic("x");
1294260368Sluigi	}
1295260368Sluigi	/* XXX verify that there are n slots */
1296260368Sluigi	k->nkr_hwlease += n;
1297260368Sluigi	if (k->nkr_hwlease > lim)
1298260368Sluigi		k->nkr_hwlease -= lim + 1;
1299260368Sluigi
1300260368Sluigi	if (k->nkr_hwlease >= k->nkr_num_slots ||
1301260368Sluigi		k->nr_hwcur >= k->nkr_num_slots ||
1302260368Sluigi		k->nr_hwtail >= k->nkr_num_slots ||
1303260368Sluigi		k->nkr_lease_idx >= k->nkr_num_slots) {
1304260368Sluigi		D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d",
1305270063Sluigi			k->na->name,
1306260368Sluigi			k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
1307260368Sluigi			k->nkr_lease_idx, k->nkr_num_slots);
1308260368Sluigi	}
1309260368Sluigi	return lease_idx;
1310260368Sluigi}
1311260368Sluigi
1312260368Sluigi/*
1313270063Sluigi *
1314259412Sluigi * This flush routine supports only unicast and broadcast but a large
1315259412Sluigi * number of ports, and lets us replace the learn and dispatch functions.
1316259412Sluigi */
1317259412Sluigiint
1318259412Sluiginm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
1319259412Sluigi		u_int ring_nr)
1320259412Sluigi{
1321259412Sluigi	struct nm_bdg_q *dst_ents, *brddst;
1322259412Sluigi	uint16_t num_dsts = 0, *dsts;
1323259412Sluigi	struct nm_bridge *b = na->na_bdg;
1324259412Sluigi	u_int i, j, me = na->bdg_port;
1325259412Sluigi
1326259412Sluigi	/*
1327259412Sluigi	 * The work area (pointed by ft) is followed by an array of
1328259412Sluigi	 * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
1329259412Sluigi	 * queues per port plus one for the broadcast traffic.
1330259412Sluigi	 * Then we have an array of destination indexes.
1331259412Sluigi	 */
1332259412Sluigi	dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
1333259412Sluigi	dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
1334259412Sluigi
1335259412Sluigi	/* first pass: find a destination for each packet in the batch */
1336259412Sluigi	for (i = 0; likely(i < n); i += ft[i].ft_frags) {
1337259412Sluigi		uint8_t dst_ring = ring_nr; /* default, same ring as origin */
1338259412Sluigi		uint16_t dst_port, d_i;
1339259412Sluigi		struct nm_bdg_q *d;
1340259412Sluigi
1341259412Sluigi		ND("slot %d frags %d", i, ft[i].ft_frags);
1342261909Sluigi		/* Drop the packet if the virtio-net header is not into the first
1343259412Sluigi		   fragment nor at the very beginning of the second. */
1344270063Sluigi		if (unlikely(na->virt_hdr_len > ft[i].ft_len))
1345259412Sluigi			continue;
1346270063Sluigi		dst_port = b->bdg_ops.lookup(&ft[i], &dst_ring, na);
1347259412Sluigi		if (netmap_verbose > 255)
1348259412Sluigi			RD(5, "slot %d port %d -> %d", i, me, dst_port);
1349259412Sluigi		if (dst_port == NM_BDG_NOPORT)
1350259412Sluigi			continue; /* this packet is identified to be dropped */
1351259412Sluigi		else if (unlikely(dst_port > NM_BDG_MAXPORTS))
1352259412Sluigi			continue;
1353259412Sluigi		else if (dst_port == NM_BDG_BROADCAST)
1354259412Sluigi			dst_ring = 0; /* broadcasts always go to ring 0 */
1355259412Sluigi		else if (unlikely(dst_port == me ||
1356259412Sluigi		    !b->bdg_ports[dst_port]))
1357259412Sluigi			continue;
1358259412Sluigi
1359259412Sluigi		/* get a position in the scratch pad */
1360259412Sluigi		d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
1361259412Sluigi		d = dst_ents + d_i;
1362259412Sluigi
1363259412Sluigi		/* append the first fragment to the list */
1364259412Sluigi		if (d->bq_head == NM_FT_NULL) { /* new destination */
1365259412Sluigi			d->bq_head = d->bq_tail = i;
1366259412Sluigi			/* remember this position to be scanned later */
1367259412Sluigi			if (dst_port != NM_BDG_BROADCAST)
1368259412Sluigi				dsts[num_dsts++] = d_i;
1369259412Sluigi		} else {
1370259412Sluigi			ft[d->bq_tail].ft_next = i;
1371259412Sluigi			d->bq_tail = i;
1372259412Sluigi		}
1373259412Sluigi		d->bq_len += ft[i].ft_frags;
1374259412Sluigi	}
1375259412Sluigi
1376259412Sluigi	/*
1377259412Sluigi	 * Broadcast traffic goes to ring 0 on all destinations.
1378259412Sluigi	 * So we need to add these rings to the list of ports to scan.
1379259412Sluigi	 * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
1380259412Sluigi	 * expensive. We should keep a compact list of active destinations
1381259412Sluigi	 * so we could shorten this loop.
1382259412Sluigi	 */
1383259412Sluigi	brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
1384259412Sluigi	if (brddst->bq_head != NM_FT_NULL) {
1385259412Sluigi		for (j = 0; likely(j < b->bdg_active_ports); j++) {
1386259412Sluigi			uint16_t d_i;
1387259412Sluigi			i = b->bdg_port_index[j];
1388259412Sluigi			if (unlikely(i == me))
1389259412Sluigi				continue;
1390259412Sluigi			d_i = i * NM_BDG_MAXRINGS;
1391259412Sluigi			if (dst_ents[d_i].bq_head == NM_FT_NULL)
1392259412Sluigi				dsts[num_dsts++] = d_i;
1393259412Sluigi		}
1394259412Sluigi	}
1395259412Sluigi
1396259412Sluigi	ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
1397270063Sluigi	/* second pass: scan destinations */
1398259412Sluigi	for (i = 0; i < num_dsts; i++) {
1399259412Sluigi		struct netmap_vp_adapter *dst_na;
1400259412Sluigi		struct netmap_kring *kring;
1401259412Sluigi		struct netmap_ring *ring;
1402261909Sluigi		u_int dst_nr, lim, j, d_i, next, brd_next;
1403259412Sluigi		u_int needed, howmany;
1404259412Sluigi		int retry = netmap_txsync_retry;
1405259412Sluigi		struct nm_bdg_q *d;
1406259412Sluigi		uint32_t my_start = 0, lease_idx = 0;
1407259412Sluigi		int nrings;
1408261909Sluigi		int virt_hdr_mismatch = 0;
1409259412Sluigi
1410259412Sluigi		d_i = dsts[i];
1411259412Sluigi		ND("second pass %d port %d", i, d_i);
1412259412Sluigi		d = dst_ents + d_i;
1413259412Sluigi		// XXX fix the division
1414259412Sluigi		dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
1415259412Sluigi		/* protect from the lookup function returning an inactive
1416259412Sluigi		 * destination port
1417259412Sluigi		 */
1418259412Sluigi		if (unlikely(dst_na == NULL))
1419259412Sluigi			goto cleanup;
1420259412Sluigi		if (dst_na->up.na_flags & NAF_SW_ONLY)
1421259412Sluigi			goto cleanup;
1422259412Sluigi		/*
1423259412Sluigi		 * The interface may be in !netmap mode in two cases:
1424259412Sluigi		 * - when na is attached but not activated yet;
1425259412Sluigi		 * - when na is being deactivated but is still attached.
1426259412Sluigi		 */
1427270063Sluigi		if (unlikely(!nm_netmap_on(&dst_na->up))) {
1428259412Sluigi			ND("not in netmap mode!");
1429259412Sluigi			goto cleanup;
1430259412Sluigi		}
1431259412Sluigi
1432259412Sluigi		/* there is at least one either unicast or broadcast packet */
1433259412Sluigi		brd_next = brddst->bq_head;
1434259412Sluigi		next = d->bq_head;
1435259412Sluigi		/* we need to reserve this many slots. If fewer are
1436259412Sluigi		 * available, some packets will be dropped.
1437259412Sluigi		 * Packets may have multiple fragments, so we may not use
1438259412Sluigi		 * there is a chance that we may not use all of the slots
1439259412Sluigi		 * we have claimed, so we will need to handle the leftover
1440259412Sluigi		 * ones when we regain the lock.
1441259412Sluigi		 */
1442259412Sluigi		needed = d->bq_len + brddst->bq_len;
1443259412Sluigi
1444261909Sluigi		if (unlikely(dst_na->virt_hdr_len != na->virt_hdr_len)) {
1445270063Sluigi			RD(3, "virt_hdr_mismatch, src %d dst %d", na->virt_hdr_len, dst_na->virt_hdr_len);
1446261909Sluigi			/* There is a virtio-net header/offloadings mismatch between
1447261909Sluigi			 * source and destination. The slower mismatch datapath will
1448261909Sluigi			 * be used to cope with all the mismatches.
1449261909Sluigi			 */
1450261909Sluigi			virt_hdr_mismatch = 1;
1451261909Sluigi			if (dst_na->mfs < na->mfs) {
1452261909Sluigi				/* We may need to do segmentation offloadings, and so
1453261909Sluigi				 * we may need a number of destination slots greater
1454261909Sluigi				 * than the number of input slots ('needed').
1455261909Sluigi				 * We look for the smallest integer 'x' which satisfies:
1456261909Sluigi				 *	needed * na->mfs + x * H <= x * na->mfs
1457261909Sluigi				 * where 'H' is the length of the longest header that may
1458261909Sluigi				 * be replicated in the segmentation process (e.g. for
1459261909Sluigi				 * TCPv4 we must account for ethernet header, IP header
1460261909Sluigi				 * and TCPv4 header).
1461261909Sluigi				 */
1462261909Sluigi				needed = (needed * na->mfs) /
1463261909Sluigi						(dst_na->mfs - WORST_CASE_GSO_HEADER) + 1;
1464261909Sluigi				ND(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed);
1465261909Sluigi			}
1466261909Sluigi		}
1467261909Sluigi
1468259412Sluigi		ND(5, "pass 2 dst %d is %x %s",
1469259412Sluigi			i, d_i, is_vp ? "virtual" : "nic/host");
1470259412Sluigi		dst_nr = d_i & (NM_BDG_MAXRINGS-1);
1471259412Sluigi		nrings = dst_na->up.num_rx_rings;
1472259412Sluigi		if (dst_nr >= nrings)
1473259412Sluigi			dst_nr = dst_nr % nrings;
1474259412Sluigi		kring = &dst_na->up.rx_rings[dst_nr];
1475259412Sluigi		ring = kring->ring;
1476259412Sluigi		lim = kring->nkr_num_slots - 1;
1477259412Sluigi
1478259412Sluigiretry:
1479259412Sluigi
1480261909Sluigi		if (dst_na->retry && retry) {
1481261909Sluigi			/* try to get some free slot from the previous run */
1482285349Sluigi			kring->nm_notify(kring, 0);
1483270063Sluigi			/* actually useful only for bwraps, since there
1484270063Sluigi			 * the notify will trigger a txsync on the hwna. VALE ports
1485270063Sluigi			 * have dst_na->retry == 0
1486270063Sluigi			 */
1487261909Sluigi		}
1488259412Sluigi		/* reserve the buffers in the queue and an entry
1489259412Sluigi		 * to report completion, and drop lock.
1490259412Sluigi		 * XXX this might become a helper function.
1491259412Sluigi		 */
1492259412Sluigi		mtx_lock(&kring->q_lock);
1493259412Sluigi		if (kring->nkr_stopped) {
1494259412Sluigi			mtx_unlock(&kring->q_lock);
1495259412Sluigi			goto cleanup;
1496259412Sluigi		}
1497259412Sluigi		my_start = j = kring->nkr_hwlease;
1498259412Sluigi		howmany = nm_kr_space(kring, 1);
1499259412Sluigi		if (needed < howmany)
1500259412Sluigi			howmany = needed;
1501259412Sluigi		lease_idx = nm_kr_lease(kring, howmany, 1);
1502259412Sluigi		mtx_unlock(&kring->q_lock);
1503259412Sluigi
1504259412Sluigi		/* only retry if we need more than available slots */
1505259412Sluigi		if (retry && needed <= howmany)
1506259412Sluigi			retry = 0;
1507259412Sluigi
1508259412Sluigi		/* copy to the destination queue */
1509259412Sluigi		while (howmany > 0) {
1510259412Sluigi			struct netmap_slot *slot;
1511259412Sluigi			struct nm_bdg_fwd *ft_p, *ft_end;
1512259412Sluigi			u_int cnt;
1513259412Sluigi
1514259412Sluigi			/* find the queue from which we pick next packet.
1515259412Sluigi			 * NM_FT_NULL is always higher than valid indexes
1516259412Sluigi			 * so we never dereference it if the other list
1517259412Sluigi			 * has packets (and if both are empty we never
1518259412Sluigi			 * get here).
1519259412Sluigi			 */
1520259412Sluigi			if (next < brd_next) {
1521259412Sluigi				ft_p = ft + next;
1522259412Sluigi				next = ft_p->ft_next;
1523259412Sluigi			} else { /* insert broadcast */
1524259412Sluigi				ft_p = ft + brd_next;
1525259412Sluigi				brd_next = ft_p->ft_next;
1526259412Sluigi			}
1527259412Sluigi			cnt = ft_p->ft_frags; // cnt > 0
1528259412Sluigi			if (unlikely(cnt > howmany))
1529259412Sluigi			    break; /* no more space */
1530259412Sluigi			if (netmap_verbose && cnt > 1)
1531259412Sluigi				RD(5, "rx %d frags to %d", cnt, j);
1532259412Sluigi			ft_end = ft_p + cnt;
1533261909Sluigi			if (unlikely(virt_hdr_mismatch)) {
1534261909Sluigi				bdg_mismatch_datapath(na, dst_na, ft_p, ring, &j, lim, &howmany);
1535261909Sluigi			} else {
1536261909Sluigi				howmany -= cnt;
1537261909Sluigi				do {
1538261909Sluigi					char *dst, *src = ft_p->ft_buf;
1539261909Sluigi					size_t copy_len = ft_p->ft_len, dst_len = copy_len;
1540259412Sluigi
1541261909Sluigi					slot = &ring->slot[j];
1542270063Sluigi					dst = NMB(&dst_na->up, slot);
1543259412Sluigi
1544261909Sluigi					ND("send [%d] %d(%d) bytes at %s:%d",
1545261909Sluigi							i, (int)copy_len, (int)dst_len,
1546261909Sluigi							NM_IFPNAME(dst_ifp), j);
1547261909Sluigi					/* round to a multiple of 64 */
1548261909Sluigi					copy_len = (copy_len + 63) & ~63;
1549260368Sluigi
1550270063Sluigi					if (unlikely(copy_len > NETMAP_BUF_SIZE(&dst_na->up) ||
1551270063Sluigi						     copy_len > NETMAP_BUF_SIZE(&na->up))) {
1552267151Sluigi						RD(5, "invalid len %d, down to 64", (int)copy_len);
1553267151Sluigi						copy_len = dst_len = 64; // XXX
1554267151Sluigi					}
1555261909Sluigi					if (ft_p->ft_flags & NS_INDIRECT) {
1556261909Sluigi						if (copyin(src, dst, copy_len)) {
1557261909Sluigi							// invalid user pointer, pretend len is 0
1558261909Sluigi							dst_len = 0;
1559261909Sluigi						}
1560261909Sluigi					} else {
1561261909Sluigi						//memcpy(dst, src, copy_len);
1562261909Sluigi						pkt_copy(src, dst, (int)copy_len);
1563261909Sluigi					}
1564261909Sluigi					slot->len = dst_len;
1565261909Sluigi					slot->flags = (cnt << 8)| NS_MOREFRAG;
1566261909Sluigi					j = nm_next(j, lim);
1567261909Sluigi					needed--;
1568261909Sluigi					ft_p++;
1569261909Sluigi				} while (ft_p != ft_end);
1570261909Sluigi				slot->flags = (cnt << 8); /* clear flag on last entry */
1571261909Sluigi			}
1572259412Sluigi			/* are we done ? */
1573259412Sluigi			if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
1574259412Sluigi				break;
1575259412Sluigi		}
1576259412Sluigi		{
1577259412Sluigi		    /* current position */
1578259412Sluigi		    uint32_t *p = kring->nkr_leases; /* shorthand */
1579259412Sluigi		    uint32_t update_pos;
1580259412Sluigi		    int still_locked = 1;
1581259412Sluigi
1582259412Sluigi		    mtx_lock(&kring->q_lock);
1583259412Sluigi		    if (unlikely(howmany > 0)) {
1584259412Sluigi			/* not used all bufs. If i am the last one
1585259412Sluigi			 * i can recover the slots, otherwise must
1586259412Sluigi			 * fill them with 0 to mark empty packets.
1587259412Sluigi			 */
1588259412Sluigi			ND("leftover %d bufs", howmany);
1589259412Sluigi			if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
1590259412Sluigi			    /* yes i am the last one */
1591259412Sluigi			    ND("roll back nkr_hwlease to %d", j);
1592259412Sluigi			    kring->nkr_hwlease = j;
1593259412Sluigi			} else {
1594259412Sluigi			    while (howmany-- > 0) {
1595259412Sluigi				ring->slot[j].len = 0;
1596259412Sluigi				ring->slot[j].flags = 0;
1597259412Sluigi				j = nm_next(j, lim);
1598259412Sluigi			    }
1599259412Sluigi			}
1600259412Sluigi		    }
1601259412Sluigi		    p[lease_idx] = j; /* report I am done */
1602259412Sluigi
1603260368Sluigi		    update_pos = kring->nr_hwtail;
1604259412Sluigi
1605259412Sluigi		    if (my_start == update_pos) {
1606259412Sluigi			/* all slots before my_start have been reported,
1607259412Sluigi			 * so scan subsequent leases to see if other ranges
1608259412Sluigi			 * have been completed, and to a selwakeup or txsync.
1609259412Sluigi		         */
1610259412Sluigi			while (lease_idx != kring->nkr_lease_idx &&
1611259412Sluigi				p[lease_idx] != NR_NOSLOT) {
1612259412Sluigi			    j = p[lease_idx];
1613259412Sluigi			    p[lease_idx] = NR_NOSLOT;
1614259412Sluigi			    lease_idx = nm_next(lease_idx, lim);
1615259412Sluigi			}
1616259412Sluigi			/* j is the new 'write' position. j != my_start
1617259412Sluigi			 * means there are new buffers to report
1618259412Sluigi			 */
1619259412Sluigi			if (likely(j != my_start)) {
1620260368Sluigi				kring->nr_hwtail = j;
1621259412Sluigi				still_locked = 0;
1622259412Sluigi				mtx_unlock(&kring->q_lock);
1623285349Sluigi				kring->nm_notify(kring, 0);
1624270063Sluigi				/* this is netmap_notify for VALE ports and
1625270063Sluigi				 * netmap_bwrap_notify for bwrap. The latter will
1626270063Sluigi				 * trigger a txsync on the underlying hwna
1627270063Sluigi				 */
1628270063Sluigi				if (dst_na->retry && retry--) {
1629270063Sluigi					/* XXX this is going to call nm_notify again.
1630270063Sluigi					 * Only useful for bwrap in virtual machines
1631270063Sluigi					 */
1632259412Sluigi					goto retry;
1633270063Sluigi				}
1634259412Sluigi			}
1635259412Sluigi		    }
1636259412Sluigi		    if (still_locked)
1637259412Sluigi			mtx_unlock(&kring->q_lock);
1638259412Sluigi		}
1639259412Sluigicleanup:
1640259412Sluigi		d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
1641259412Sluigi		d->bq_len = 0;
1642259412Sluigi	}
1643259412Sluigi	brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
1644259412Sluigi	brddst->bq_len = 0;
1645259412Sluigi	return 0;
1646259412Sluigi}
1647259412Sluigi
1648270063Sluigi/* nm_txsync callback for VALE ports */
1649259412Sluigistatic int
1650270063Sluiginetmap_vp_txsync(struct netmap_kring *kring, int flags)
1651259412Sluigi{
1652270063Sluigi	struct netmap_vp_adapter *na =
1653270063Sluigi		(struct netmap_vp_adapter *)kring->na;
1654260368Sluigi	u_int done;
1655260368Sluigi	u_int const lim = kring->nkr_num_slots - 1;
1656285349Sluigi	u_int const head = kring->rhead;
1657259412Sluigi
1658259412Sluigi	if (bridge_batch <= 0) { /* testing only */
1659285349Sluigi		done = head; // used all
1660259412Sluigi		goto done;
1661259412Sluigi	}
1662270063Sluigi	if (!na->na_bdg) {
1663285349Sluigi		done = head;
1664270063Sluigi		goto done;
1665270063Sluigi	}
1666259412Sluigi	if (bridge_batch > NM_BDG_BATCH)
1667259412Sluigi		bridge_batch = NM_BDG_BATCH;
1668259412Sluigi
1669285349Sluigi	done = nm_bdg_preflush(kring, head);
1670259412Sluigidone:
1671285349Sluigi	if (done != head)
1672285349Sluigi		D("early break at %d/ %d, tail %d", done, head, kring->nr_hwtail);
1673260368Sluigi	/*
1674260368Sluigi	 * packets between 'done' and 'cur' are left unsent.
1675260368Sluigi	 */
1676260368Sluigi	kring->nr_hwcur = done;
1677260368Sluigi	kring->nr_hwtail = nm_prev(done, lim);
1678259412Sluigi	if (netmap_verbose)
1679270063Sluigi		D("%s ring %d flags %d", na->up.name, kring->ring_id, flags);
1680259412Sluigi	return 0;
1681259412Sluigi}
1682259412Sluigi
1683259412Sluigi
1684270063Sluigi/* rxsync code used by VALE ports nm_rxsync callback and also
1685270063Sluigi * internally by the brwap
1686259412Sluigi */
1687259412Sluigistatic int
1688270063Sluiginetmap_vp_rxsync_locked(struct netmap_kring *kring, int flags)
1689259412Sluigi{
1690270063Sluigi	struct netmap_adapter *na = kring->na;
1691259412Sluigi	struct netmap_ring *ring = kring->ring;
1692260368Sluigi	u_int nm_i, lim = kring->nkr_num_slots - 1;
1693285349Sluigi	u_int head = kring->rhead;
1694259412Sluigi	int n;
1695259412Sluigi
1696260368Sluigi	if (head > lim) {
1697259412Sluigi		D("ouch dangerous reset!!!");
1698259412Sluigi		n = netmap_ring_reinit(kring);
1699259412Sluigi		goto done;
1700259412Sluigi	}
1701259412Sluigi
1702260368Sluigi	/* First part, import newly received packets. */
1703260368Sluigi	/* actually nothing to do here, they are already in the kring */
1704259412Sluigi
1705260368Sluigi	/* Second part, skip past packets that userspace has released. */
1706260368Sluigi	nm_i = kring->nr_hwcur;
1707260368Sluigi	if (nm_i != head) {
1708260368Sluigi		/* consistency check, but nothing really important here */
1709260368Sluigi		for (n = 0; likely(nm_i != head); n++) {
1710260368Sluigi			struct netmap_slot *slot = &ring->slot[nm_i];
1711270063Sluigi			void *addr = NMB(na, slot);
1712259412Sluigi
1713270063Sluigi			if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */
1714259412Sluigi				D("bad buffer index %d, ignore ?",
1715259412Sluigi					slot->buf_idx);
1716259412Sluigi			}
1717259412Sluigi			slot->flags &= ~NS_BUF_CHANGED;
1718260368Sluigi			nm_i = nm_next(nm_i, lim);
1719259412Sluigi		}
1720260368Sluigi		kring->nr_hwcur = head;
1721259412Sluigi	}
1722260368Sluigi
1723259412Sluigi	n = 0;
1724259412Sluigidone:
1725260368Sluigi	return n;
1726260368Sluigi}
1727260368Sluigi
1728260368Sluigi/*
1729270063Sluigi * nm_rxsync callback for VALE ports
1730260368Sluigi * user process reading from a VALE switch.
1731260368Sluigi * Already protected against concurrent calls from userspace,
1732260368Sluigi * but we must acquire the queue's lock to protect against
1733260368Sluigi * writers on the same queue.
1734260368Sluigi */
1735260368Sluigistatic int
1736270063Sluiginetmap_vp_rxsync(struct netmap_kring *kring, int flags)
1737260368Sluigi{
1738260368Sluigi	int n;
1739260368Sluigi
1740260368Sluigi	mtx_lock(&kring->q_lock);
1741270063Sluigi	n = netmap_vp_rxsync_locked(kring, flags);
1742259412Sluigi	mtx_unlock(&kring->q_lock);
1743259412Sluigi	return n;
1744259412Sluigi}
1745259412Sluigi
1746260368Sluigi
1747270063Sluigi/* nm_bdg_attach callback for VALE ports
1748270063Sluigi * The na_vp port is this same netmap_adapter. There is no host port.
1749270063Sluigi */
1750259412Sluigistatic int
1751270063Sluiginetmap_vp_bdg_attach(const char *name, struct netmap_adapter *na)
1752259412Sluigi{
1753270063Sluigi	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
1754270063Sluigi
1755270063Sluigi	if (vpna->na_bdg)
1756270063Sluigi		return EBUSY;
1757270063Sluigi	na->na_vp = vpna;
1758270063Sluigi	strncpy(na->name, name, sizeof(na->name));
1759270063Sluigi	na->na_hostvp = NULL;
1760270063Sluigi	return 0;
1761270063Sluigi}
1762270063Sluigi
1763270063Sluigi/* create a netmap_vp_adapter that describes a VALE port.
1764270063Sluigi * Only persistent VALE ports have a non-null ifp.
1765270063Sluigi */
1766270063Sluigistatic int
1767270063Sluiginetmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, struct netmap_vp_adapter **ret)
1768270063Sluigi{
1769259412Sluigi	struct netmap_vp_adapter *vpna;
1770259412Sluigi	struct netmap_adapter *na;
1771259412Sluigi	int error;
1772261909Sluigi	u_int npipes = 0;
1773259412Sluigi
1774259412Sluigi	vpna = malloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO);
1775259412Sluigi	if (vpna == NULL)
1776259412Sluigi		return ENOMEM;
1777259412Sluigi
1778259412Sluigi 	na = &vpna->up;
1779259412Sluigi
1780259412Sluigi	na->ifp = ifp;
1781270063Sluigi	strncpy(na->name, nmr->nr_name, sizeof(na->name));
1782259412Sluigi
1783259412Sluigi	/* bound checking */
1784259412Sluigi	na->num_tx_rings = nmr->nr_tx_rings;
1785259412Sluigi	nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1786259412Sluigi	nmr->nr_tx_rings = na->num_tx_rings; // write back
1787259412Sluigi	na->num_rx_rings = nmr->nr_rx_rings;
1788259412Sluigi	nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1789259412Sluigi	nmr->nr_rx_rings = na->num_rx_rings; // write back
1790259412Sluigi	nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE,
1791259412Sluigi			1, NM_BDG_MAXSLOTS, NULL);
1792259412Sluigi	na->num_tx_desc = nmr->nr_tx_slots;
1793259412Sluigi	nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE,
1794259412Sluigi			1, NM_BDG_MAXSLOTS, NULL);
1795261909Sluigi	/* validate number of pipes. We want at least 1,
1796261909Sluigi	 * but probably can do with some more.
1797261909Sluigi	 * So let's use 2 as default (when 0 is supplied)
1798261909Sluigi	 */
1799261909Sluigi	npipes = nmr->nr_arg1;
1800261909Sluigi	nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL);
1801261909Sluigi	nmr->nr_arg1 = npipes;	/* write back */
1802261909Sluigi	/* validate extra bufs */
1803261909Sluigi	nm_bound_var(&nmr->nr_arg3, 0, 0,
1804261909Sluigi			128*NM_BDG_MAXSLOTS, NULL);
1805259412Sluigi	na->num_rx_desc = nmr->nr_rx_slots;
1806261909Sluigi	vpna->virt_hdr_len = 0;
1807261909Sluigi	vpna->mfs = 1514;
1808285349Sluigi	vpna->last_smac = ~0llu;
1809261909Sluigi	/*if (vpna->mfs > netmap_buf_size)  TODO netmap_buf_size is zero??
1810261909Sluigi		vpna->mfs = netmap_buf_size; */
1811261909Sluigi        if (netmap_verbose)
1812261909Sluigi		D("max frame size %u", vpna->mfs);
1813259412Sluigi
1814285349Sluigi	na->na_flags |= NAF_BDG_MAYSLEEP;
1815285698Sluigi	/* persistent VALE ports look like hw devices
1816285698Sluigi	 * with a native netmap adapter
1817285698Sluigi	 */
1818285698Sluigi	if (ifp)
1819285698Sluigi		na->na_flags |= NAF_NATIVE;
1820270063Sluigi	na->nm_txsync = netmap_vp_txsync;
1821270063Sluigi	na->nm_rxsync = netmap_vp_rxsync;
1822270063Sluigi	na->nm_register = netmap_vp_reg;
1823259412Sluigi	na->nm_krings_create = netmap_vp_krings_create;
1824259412Sluigi	na->nm_krings_delete = netmap_vp_krings_delete;
1825270063Sluigi	na->nm_dtor = netmap_vp_dtor;
1826270063Sluigi	na->nm_mem = netmap_mem_private_new(na->name,
1827259412Sluigi			na->num_tx_rings, na->num_tx_desc,
1828261909Sluigi			na->num_rx_rings, na->num_rx_desc,
1829261909Sluigi			nmr->nr_arg3, npipes, &error);
1830261909Sluigi	if (na->nm_mem == NULL)
1831261909Sluigi		goto err;
1832270063Sluigi	na->nm_bdg_attach = netmap_vp_bdg_attach;
1833259412Sluigi	/* other nmd fields are set in the common routine */
1834259412Sluigi	error = netmap_attach_common(na);
1835261909Sluigi	if (error)
1836261909Sluigi		goto err;
1837270063Sluigi	*ret = vpna;
1838259412Sluigi	return 0;
1839261909Sluigi
1840261909Sluigierr:
1841261909Sluigi	if (na->nm_mem != NULL)
1842285349Sluigi		netmap_mem_delete(na->nm_mem);
1843261909Sluigi	free(vpna, M_DEVBUF);
1844261909Sluigi	return error;
1845259412Sluigi}
1846259412Sluigi
1847270063Sluigi/* Bridge wrapper code (bwrap).
1848270063Sluigi * This is used to connect a non-VALE-port netmap_adapter (hwna) to a
1849270063Sluigi * VALE switch.
1850270063Sluigi * The main task is to swap the meaning of tx and rx rings to match the
1851270063Sluigi * expectations of the VALE switch code (see nm_bdg_flush).
1852270063Sluigi *
1853270063Sluigi * The bwrap works by interposing a netmap_bwrap_adapter between the
1854270063Sluigi * rest of the system and the hwna. The netmap_bwrap_adapter looks like
1855270063Sluigi * a netmap_vp_adapter to the rest the system, but, internally, it
1856270063Sluigi * translates all callbacks to what the hwna expects.
1857270063Sluigi *
1858270063Sluigi * Note that we have to intercept callbacks coming from two sides:
1859270063Sluigi *
1860270063Sluigi *  - callbacks coming from the netmap module are intercepted by
1861270063Sluigi *    passing around the netmap_bwrap_adapter instead of the hwna
1862270063Sluigi *
1863270063Sluigi *  - callbacks coming from outside of the netmap module only know
1864270063Sluigi *    about the hwna. This, however, only happens in interrupt
1865270063Sluigi *    handlers, where only the hwna->nm_notify callback is called.
1866270063Sluigi *    What the bwrap does is to overwrite the hwna->nm_notify callback
1867270063Sluigi *    with its own netmap_bwrap_intr_notify.
1868270063Sluigi *    XXX This assumes that the hwna->nm_notify callback was the
1869270063Sluigi *    standard netmap_notify(), as it is the case for nic adapters.
1870270063Sluigi *    Any additional action performed by hwna->nm_notify will not be
1871270063Sluigi *    performed by netmap_bwrap_intr_notify.
1872270063Sluigi *
1873270063Sluigi * Additionally, the bwrap can optionally attach the host rings pair
1874270063Sluigi * of the wrapped adapter to a different port of the switch.
1875270063Sluigi */
1876260368Sluigi
1877270063Sluigi
1878259412Sluigistatic void
1879259412Sluiginetmap_bwrap_dtor(struct netmap_adapter *na)
1880259412Sluigi{
1881259412Sluigi	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
1882259412Sluigi	struct netmap_adapter *hwna = bna->hwna;
1883259412Sluigi
1884259412Sluigi	ND("na %p", na);
1885270063Sluigi	/* drop reference to hwna->ifp.
1886270063Sluigi	 * If we don't do this, netmap_detach_common(na)
1887270063Sluigi	 * will think it has set NA(na->ifp) to NULL
1888270063Sluigi	 */
1889270063Sluigi	na->ifp = NULL;
1890270063Sluigi	/* for safety, also drop the possible reference
1891270063Sluigi	 * in the hostna
1892270063Sluigi	 */
1893270063Sluigi	bna->host.up.ifp = NULL;
1894259412Sluigi
1895270063Sluigi	hwna->nm_mem = bna->save_nmd;
1896259412Sluigi	hwna->na_private = NULL;
1897270063Sluigi	hwna->na_vp = hwna->na_hostvp = NULL;
1898270063Sluigi	hwna->na_flags &= ~NAF_BUSY;
1899259412Sluigi	netmap_adapter_put(hwna);
1900259412Sluigi
1901259412Sluigi}
1902259412Sluigi
1903260368Sluigi
1904259412Sluigi/*
1905260368Sluigi * Intr callback for NICs connected to a bridge.
1906260368Sluigi * Simply ignore tx interrupts (maybe we could try to recover space ?)
1907260368Sluigi * and pass received packets from nic to the bridge.
1908260368Sluigi *
1909259412Sluigi * XXX TODO check locking: this is called from the interrupt
1910259412Sluigi * handler so we should make sure that the interface is not
1911259412Sluigi * disconnected while passing down an interrupt.
1912259412Sluigi *
1913260368Sluigi * Note, no user process can access this NIC or the host stack.
1914260368Sluigi * The only part of the ring that is significant are the slots,
1915260368Sluigi * and head/cur/tail are set from the kring as needed
1916260368Sluigi * (part as a receive ring, part as a transmit ring).
1917260368Sluigi *
1918260368Sluigi * callback that overwrites the hwna notify callback.
1919259412Sluigi * Packets come from the outside or from the host stack and are put on an hwna rx ring.
1920259412Sluigi * The bridge wrapper then sends the packets through the bridge.
1921259412Sluigi */
1922259412Sluigistatic int
1923285349Sluiginetmap_bwrap_intr_notify(struct netmap_kring *kring, int flags)
1924259412Sluigi{
1925285349Sluigi	struct netmap_adapter *na = kring->na;
1926259412Sluigi	struct netmap_bwrap_adapter *bna = na->na_private;
1927285349Sluigi	struct netmap_kring *bkring;
1928259412Sluigi	struct netmap_vp_adapter *vpna = &bna->up;
1929285349Sluigi	u_int ring_nr = kring->ring_id;
1930259412Sluigi	int error = 0;
1931259412Sluigi
1932260368Sluigi	if (netmap_verbose)
1933285349Sluigi	    D("%s %s 0x%x", na->name, kring->name, flags);
1934259412Sluigi
1935270063Sluigi	if (!nm_netmap_on(na))
1936259412Sluigi		return 0;
1937259412Sluigi
1938285349Sluigi	bkring = &vpna->up.tx_rings[ring_nr];
1939259412Sluigi
1940259412Sluigi	/* make sure the ring is not disabled */
1941259412Sluigi	if (nm_kr_tryget(kring))
1942259412Sluigi		return 0;
1943259412Sluigi
1944260368Sluigi	if (netmap_verbose)
1945285349Sluigi	    D("%s head %d cur %d tail %d",  na->name,
1946260368Sluigi		kring->rhead, kring->rcur, kring->rtail);
1947260368Sluigi
1948285349Sluigi	/* simulate a user wakeup on the rx ring
1949285349Sluigi	 * fetch packets that have arrived.
1950261909Sluigi	 */
1951261909Sluigi	error = kring->nm_sync(kring, 0);
1952261909Sluigi	if (error)
1953261909Sluigi		goto put_out;
1954260368Sluigi	if (kring->nr_hwcur == kring->nr_hwtail && netmap_verbose) {
1955259412Sluigi		D("how strange, interrupt with no packets on %s",
1956270063Sluigi			na->name);
1957259412Sluigi		goto put_out;
1958259412Sluigi	}
1959260368Sluigi
1960285349Sluigi	/* new packets are kring->rcur to kring->nr_hwtail, and the bkring
1961285349Sluigi	 * had hwcur == bkring->rhead. So advance bkring->rhead to kring->nr_hwtail
1962260368Sluigi	 * to push all packets out.
1963260368Sluigi	 */
1964285349Sluigi	bkring->rhead = bkring->rcur = kring->nr_hwtail;
1965260368Sluigi
1966270063Sluigi	netmap_vp_txsync(bkring, flags);
1967259412Sluigi
1968260368Sluigi	/* mark all buffers as released on this ring */
1969285349Sluigi	kring->rhead = kring->rcur = kring->rtail = kring->nr_hwtail;
1970260368Sluigi	/* another call to actually release the buffers */
1971285349Sluigi	error = kring->nm_sync(kring, 0);
1972259412Sluigi
1973259412Sluigiput_out:
1974259412Sluigi	nm_kr_put(kring);
1975259412Sluigi	return error;
1976259412Sluigi}
1977259412Sluigi
1978260368Sluigi
1979270063Sluigi/* nm_register callback for bwrap */
1980259412Sluigistatic int
1981259412Sluiginetmap_bwrap_register(struct netmap_adapter *na, int onoff)
1982259412Sluigi{
1983259412Sluigi	struct netmap_bwrap_adapter *bna =
1984259412Sluigi		(struct netmap_bwrap_adapter *)na;
1985259412Sluigi	struct netmap_adapter *hwna = bna->hwna;
1986259412Sluigi	struct netmap_vp_adapter *hostna = &bna->host;
1987259412Sluigi	int error;
1988285349Sluigi	enum txrx t;
1989259412Sluigi
1990270063Sluigi	ND("%s %s", na->name, onoff ? "on" : "off");
1991259412Sluigi
1992259412Sluigi	if (onoff) {
1993259412Sluigi		int i;
1994259412Sluigi
1995270063Sluigi		/* netmap_do_regif has been called on the bwrap na.
1996270063Sluigi		 * We need to pass the information about the
1997270063Sluigi		 * memory allocator down to the hwna before
1998270063Sluigi		 * putting it in netmap mode
1999270063Sluigi		 */
2000259412Sluigi		hwna->na_lut = na->na_lut;
2001259412Sluigi
2002259412Sluigi		if (hostna->na_bdg) {
2003270063Sluigi			/* if the host rings have been attached to switch,
2004270063Sluigi			 * we need to copy the memory allocator information
2005270063Sluigi			 * in the hostna also
2006270063Sluigi			 */
2007259412Sluigi			hostna->up.na_lut = na->na_lut;
2008259412Sluigi		}
2009259412Sluigi
2010260516Sluigi		/* cross-link the netmap rings
2011260516Sluigi		 * The original number of rings comes from hwna,
2012260516Sluigi		 * rx rings on one side equals tx rings on the other.
2013270063Sluigi		 * We need to do this now, after the initialization
2014270063Sluigi		 * of the kring->ring pointers
2015260516Sluigi		 */
2016285349Sluigi		for_rx_tx(t) {
2017285349Sluigi			enum txrx r= nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
2018285349Sluigi			for (i = 0; i < nma_get_nrings(na, r) + 1; i++) {
2019285349Sluigi				NMR(hwna, t)[i].nkr_num_slots = NMR(na, r)[i].nkr_num_slots;
2020285349Sluigi				NMR(hwna, t)[i].ring = NMR(na, r)[i].ring;
2021285349Sluigi			}
2022259412Sluigi		}
2023259412Sluigi	}
2024259412Sluigi
2025270063Sluigi	/* forward the request to the hwna */
2026270063Sluigi	error = hwna->nm_register(hwna, onoff);
2027270063Sluigi	if (error)
2028270063Sluigi		return error;
2029259412Sluigi
2030270063Sluigi	/* impersonate a netmap_vp_adapter */
2031270063Sluigi	netmap_vp_reg(na, onoff);
2032270063Sluigi	if (hostna->na_bdg)
2033270063Sluigi		netmap_vp_reg(&hostna->up, onoff);
2034259412Sluigi
2035259412Sluigi	if (onoff) {
2036285349Sluigi		u_int i;
2037285349Sluigi		/* intercept the hwna nm_nofify callback on the hw rings */
2038285349Sluigi		for (i = 0; i < hwna->num_rx_rings; i++) {
2039285349Sluigi			hwna->rx_rings[i].save_notify = hwna->rx_rings[i].nm_notify;
2040285349Sluigi			hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify;
2041285349Sluigi		}
2042285349Sluigi		i = hwna->num_rx_rings; /* for safety */
2043285349Sluigi		/* save the host ring notify unconditionally */
2044285349Sluigi		hwna->rx_rings[i].save_notify = hwna->rx_rings[i].nm_notify;
2045285349Sluigi		if (hostna->na_bdg) {
2046285349Sluigi			/* also intercept the host ring notify */
2047285349Sluigi			hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify;
2048285349Sluigi		}
2049259412Sluigi	} else {
2050285349Sluigi		u_int i;
2051285349Sluigi		/* reset all notify callbacks (including host ring) */
2052285349Sluigi		for (i = 0; i <= hwna->num_rx_rings; i++) {
2053285349Sluigi			hwna->rx_rings[i].nm_notify = hwna->rx_rings[i].save_notify;
2054285349Sluigi			hwna->rx_rings[i].save_notify = NULL;
2055285349Sluigi		}
2056285349Sluigi		hwna->na_lut.lut = NULL;
2057285349Sluigi		hwna->na_lut.objtotal = 0;
2058285349Sluigi		hwna->na_lut.objsize = 0;
2059259412Sluigi	}
2060259412Sluigi
2061259412Sluigi	return 0;
2062259412Sluigi}
2063259412Sluigi
2064270063Sluigi/* nm_config callback for bwrap */
2065259412Sluigistatic int
2066259412Sluiginetmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
2067259412Sluigi				    u_int *rxr, u_int *rxd)
2068259412Sluigi{
2069259412Sluigi	struct netmap_bwrap_adapter *bna =
2070259412Sluigi		(struct netmap_bwrap_adapter *)na;
2071259412Sluigi	struct netmap_adapter *hwna = bna->hwna;
2072259412Sluigi
2073259412Sluigi	/* forward the request */
2074259412Sluigi	netmap_update_config(hwna);
2075259412Sluigi	/* swap the results */
2076259412Sluigi	*txr = hwna->num_rx_rings;
2077259412Sluigi	*txd = hwna->num_rx_desc;
2078259412Sluigi	*rxr = hwna->num_tx_rings;
2079259412Sluigi	*rxd = hwna->num_rx_desc;
2080259412Sluigi
2081259412Sluigi	return 0;
2082259412Sluigi}
2083259412Sluigi
2084260368Sluigi
2085270063Sluigi/* nm_krings_create callback for bwrap */
2086259412Sluigistatic int
2087259412Sluiginetmap_bwrap_krings_create(struct netmap_adapter *na)
2088259412Sluigi{
2089259412Sluigi	struct netmap_bwrap_adapter *bna =
2090259412Sluigi		(struct netmap_bwrap_adapter *)na;
2091259412Sluigi	struct netmap_adapter *hwna = bna->hwna;
2092259412Sluigi	struct netmap_adapter *hostna = &bna->host.up;
2093259412Sluigi	int error;
2094259412Sluigi
2095270063Sluigi	ND("%s", na->name);
2096259412Sluigi
2097270063Sluigi	/* impersonate a netmap_vp_adapter */
2098259412Sluigi	error = netmap_vp_krings_create(na);
2099259412Sluigi	if (error)
2100259412Sluigi		return error;
2101259412Sluigi
2102270063Sluigi	/* also create the hwna krings */
2103259412Sluigi	error = hwna->nm_krings_create(hwna);
2104259412Sluigi	if (error) {
2105259412Sluigi		netmap_vp_krings_delete(na);
2106259412Sluigi		return error;
2107259412Sluigi	}
2108270063Sluigi	/* the connection between the bwrap krings and the hwna krings
2109270063Sluigi	 * will be perfomed later, in the nm_register callback, since
2110270063Sluigi	 * now the kring->ring pointers have not been initialized yet
2111270063Sluigi	 */
2112259412Sluigi
2113261909Sluigi	if (na->na_flags & NAF_HOST_RINGS) {
2114270063Sluigi		/* the hostna rings are the host rings of the bwrap.
2115270063Sluigi		 * The corresponding krings must point back to the
2116270063Sluigi		 * hostna
2117270063Sluigi		 */
2118285349Sluigi		hostna->tx_rings = &na->tx_rings[na->num_tx_rings];
2119270063Sluigi		hostna->tx_rings[0].na = hostna;
2120285349Sluigi		hostna->rx_rings = &na->rx_rings[na->num_rx_rings];
2121270063Sluigi		hostna->rx_rings[0].na = hostna;
2122261909Sluigi	}
2123259412Sluigi
2124259412Sluigi	return 0;
2125259412Sluigi}
2126259412Sluigi
2127260368Sluigi
2128259412Sluigistatic void
2129259412Sluiginetmap_bwrap_krings_delete(struct netmap_adapter *na)
2130259412Sluigi{
2131259412Sluigi	struct netmap_bwrap_adapter *bna =
2132259412Sluigi		(struct netmap_bwrap_adapter *)na;
2133259412Sluigi	struct netmap_adapter *hwna = bna->hwna;
2134259412Sluigi
2135270063Sluigi	ND("%s", na->name);
2136259412Sluigi
2137259412Sluigi	hwna->nm_krings_delete(hwna);
2138259412Sluigi	netmap_vp_krings_delete(na);
2139259412Sluigi}
2140259412Sluigi
2141260368Sluigi
2142259412Sluigi/* notify method for the bridge-->hwna direction */
2143259412Sluigistatic int
2144285349Sluiginetmap_bwrap_notify(struct netmap_kring *kring, int flags)
2145259412Sluigi{
2146285349Sluigi	struct netmap_adapter *na = kring->na;
2147285349Sluigi	struct netmap_bwrap_adapter *bna = na->na_private;
2148259412Sluigi	struct netmap_adapter *hwna = bna->hwna;
2149285349Sluigi	u_int ring_n = kring->ring_id;
2150285349Sluigi	u_int lim = kring->nkr_num_slots - 1;
2151285349Sluigi	struct netmap_kring *hw_kring;
2152259412Sluigi	int error = 0;
2153259412Sluigi
2154285349Sluigi	ND("%s: na %s hwna %s",
2155285349Sluigi			(kring ? kring->name : "NULL!"),
2156285349Sluigi			(na ? na->name : "NULL!"),
2157285349Sluigi			(hwna ? hwna->name : "NULL!"));
2158259412Sluigi	hw_kring = &hwna->tx_rings[ring_n];
2159259412Sluigi
2160285349Sluigi	if (nm_kr_tryget(hw_kring))
2161285349Sluigi		return 0;
2162285349Sluigi
2163270063Sluigi	if (!nm_netmap_on(hwna))
2164259412Sluigi		return 0;
2165260368Sluigi	/* first step: simulate a user wakeup on the rx ring */
2166285349Sluigi	netmap_vp_rxsync(kring, flags);
2167260368Sluigi	ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
2168270063Sluigi		na->name, ring_n,
2169260368Sluigi		kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
2170260368Sluigi		ring->head, ring->cur, ring->tail,
2171260368Sluigi		hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail);
2172285349Sluigi	/* second step: the new packets are sent on the tx ring
2173260368Sluigi	 * (which is actually the same ring)
2174260368Sluigi	 */
2175285349Sluigi	hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail;
2176261909Sluigi	error = hw_kring->nm_sync(hw_kring, flags);
2177285349Sluigi	if (error)
2178285349Sluigi		goto out;
2179260368Sluigi
2180285349Sluigi	/* third step: now we are back the rx ring */
2181260368Sluigi	/* claim ownership on all hw owned bufs */
2182285349Sluigi	kring->rhead = kring->rcur = nm_next(hw_kring->nr_hwtail, lim); /* skip past reserved slot */
2183260368Sluigi
2184285349Sluigi	/* fourth step: the user goes to sleep again, causing another rxsync */
2185285349Sluigi	netmap_vp_rxsync(kring, flags);
2186260368Sluigi	ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
2187270063Sluigi		na->name, ring_n,
2188260368Sluigi		kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
2189260368Sluigi		ring->head, ring->cur, ring->tail,
2190260368Sluigi		hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
2191285349Sluigiout:
2192285349Sluigi	nm_kr_put(hw_kring);
2193259412Sluigi	return error;
2194259412Sluigi}
2195259412Sluigi
2196260368Sluigi
2197270063Sluigi/* nm_bdg_ctl callback for the bwrap.
2198270063Sluigi * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd].
2199270063Sluigi * On attach, it needs to provide a fake netmap_priv_d structure and
2200270063Sluigi * perform a netmap_do_regif() on the bwrap. This will put both the
2201270063Sluigi * bwrap and the hwna in netmap mode, with the netmap rings shared
2202270063Sluigi * and cross linked. Moroever, it will start intercepting interrupts
2203270063Sluigi * directed to hwna.
2204270063Sluigi */
2205259412Sluigistatic int
2206270063Sluiginetmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
2207259412Sluigi{
2208270063Sluigi	struct netmap_priv_d *npriv;
2209270063Sluigi	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
2210270063Sluigi	int error = 0;
2211270063Sluigi
2212270063Sluigi	if (attach) {
2213270063Sluigi		if (NETMAP_OWNED_BY_ANY(na)) {
2214270063Sluigi			return EBUSY;
2215270063Sluigi		}
2216270063Sluigi		if (bna->na_kpriv) {
2217270063Sluigi			/* nothing to do */
2218270063Sluigi			return 0;
2219270063Sluigi		}
2220270063Sluigi		npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO);
2221270063Sluigi		if (npriv == NULL)
2222270063Sluigi			return ENOMEM;
2223285349Sluigi		error = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags);
2224285349Sluigi		if (error) {
2225270063Sluigi			bzero(npriv, sizeof(*npriv));
2226270063Sluigi			free(npriv, M_DEVBUF);
2227270063Sluigi			return error;
2228270063Sluigi		}
2229270063Sluigi		bna->na_kpriv = npriv;
2230270063Sluigi		na->na_flags |= NAF_BUSY;
2231270063Sluigi	} else {
2232270063Sluigi		int last_instance;
2233270063Sluigi
2234270063Sluigi		if (na->active_fds == 0) /* not registered */
2235270063Sluigi			return EINVAL;
2236270063Sluigi		last_instance = netmap_dtor_locked(bna->na_kpriv);
2237270063Sluigi		if (!last_instance) {
2238270063Sluigi			D("--- error, trying to detach an entry with active mmaps");
2239270063Sluigi			error = EINVAL;
2240270063Sluigi		} else {
2241270063Sluigi			struct nm_bridge *b = bna->up.na_bdg,
2242270063Sluigi				*bh = bna->host.na_bdg;
2243270063Sluigi			npriv = bna->na_kpriv;
2244270063Sluigi			bna->na_kpriv = NULL;
2245270063Sluigi			D("deleting priv");
2246270063Sluigi
2247270063Sluigi			bzero(npriv, sizeof(*npriv));
2248270063Sluigi			free(npriv, M_DEVBUF);
2249270063Sluigi			if (b) {
2250270063Sluigi				/* XXX the bwrap dtor should take care
2251270063Sluigi				 * of this (2014-06-16)
2252270063Sluigi				 */
2253270063Sluigi				netmap_bdg_detach_common(b, bna->up.bdg_port,
2254270063Sluigi				    (bh ? bna->host.bdg_port : -1));
2255270063Sluigi			}
2256270063Sluigi			na->na_flags &= ~NAF_BUSY;
2257270063Sluigi		}
2258270063Sluigi	}
2259270063Sluigi	return error;
2260270063Sluigi
2261270063Sluigi}
2262270063Sluigi
2263270063Sluigi/* attach a bridge wrapper to the 'real' device */
2264270063Sluigiint
2265270063Sluiginetmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
2266270063Sluigi{
2267259412Sluigi	struct netmap_bwrap_adapter *bna;
2268270063Sluigi	struct netmap_adapter *na = NULL;
2269270063Sluigi	struct netmap_adapter *hostna = NULL;
2270270063Sluigi	int error = 0;
2271285349Sluigi	enum txrx t;
2272259412Sluigi
2273270063Sluigi	/* make sure the NIC is not already in use */
2274270063Sluigi	if (NETMAP_OWNED_BY_ANY(hwna)) {
2275270063Sluigi		D("NIC %s busy, cannot attach to bridge", hwna->name);
2276270063Sluigi		return EBUSY;
2277270063Sluigi	}
2278259412Sluigi
2279259412Sluigi	bna = malloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO);
2280270063Sluigi	if (bna == NULL) {
2281259412Sluigi		return ENOMEM;
2282270063Sluigi	}
2283259412Sluigi
2284259412Sluigi	na = &bna->up.up;
2285285349Sluigi	na->na_private = bna;
2286270063Sluigi	strncpy(na->name, nr_name, sizeof(na->name));
2287259412Sluigi	/* fill the ring data for the bwrap adapter with rx/tx meanings
2288259412Sluigi	 * swapped. The real cross-linking will be done during register,
2289259412Sluigi	 * when all the krings will have been created.
2290259412Sluigi	 */
2291285349Sluigi	for_rx_tx(t) {
2292285349Sluigi		enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
2293285349Sluigi		nma_set_nrings(na, t, nma_get_nrings(hwna, r));
2294285349Sluigi		nma_set_ndesc(na, t, nma_get_ndesc(hwna, r));
2295285349Sluigi	}
2296259412Sluigi	na->nm_dtor = netmap_bwrap_dtor;
2297259412Sluigi	na->nm_register = netmap_bwrap_register;
2298259412Sluigi	// na->nm_txsync = netmap_bwrap_txsync;
2299259412Sluigi	// na->nm_rxsync = netmap_bwrap_rxsync;
2300259412Sluigi	na->nm_config = netmap_bwrap_config;
2301259412Sluigi	na->nm_krings_create = netmap_bwrap_krings_create;
2302259412Sluigi	na->nm_krings_delete = netmap_bwrap_krings_delete;
2303259412Sluigi	na->nm_notify = netmap_bwrap_notify;
2304270063Sluigi	na->nm_bdg_ctl = netmap_bwrap_bdg_ctl;
2305270063Sluigi	na->pdev = hwna->pdev;
2306270063Sluigi	na->nm_mem = netmap_mem_private_new(na->name,
2307270063Sluigi			na->num_tx_rings, na->num_tx_desc,
2308270063Sluigi			na->num_rx_rings, na->num_rx_desc,
2309270063Sluigi			0, 0, &error);
2310270063Sluigi	na->na_flags |= NAF_MEM_OWNER;
2311270063Sluigi	if (na->nm_mem == NULL)
2312270063Sluigi		goto err_put;
2313259412Sluigi	bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
2314259412Sluigi
2315259412Sluigi	bna->hwna = hwna;
2316259412Sluigi	netmap_adapter_get(hwna);
2317259412Sluigi	hwna->na_private = bna; /* weak reference */
2318270063Sluigi	hwna->na_vp = &bna->up;
2319270063Sluigi
2320261909Sluigi	if (hwna->na_flags & NAF_HOST_RINGS) {
2321270063Sluigi		if (hwna->na_flags & NAF_SW_ONLY)
2322270063Sluigi			na->na_flags |= NAF_SW_ONLY;
2323261909Sluigi		na->na_flags |= NAF_HOST_RINGS;
2324261909Sluigi		hostna = &bna->host.up;
2325270063Sluigi		snprintf(hostna->name, sizeof(hostna->name), "%s^", nr_name);
2326261909Sluigi		hostna->ifp = hwna->ifp;
2327285349Sluigi		for_rx_tx(t) {
2328285349Sluigi			enum txrx r = nm_txrx_swap(t);
2329285349Sluigi			nma_set_nrings(hostna, t, 1);
2330285349Sluigi			nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r));
2331285349Sluigi		}
2332261909Sluigi		// hostna->nm_txsync = netmap_bwrap_host_txsync;
2333261909Sluigi		// hostna->nm_rxsync = netmap_bwrap_host_rxsync;
2334285349Sluigi		hostna->nm_notify = netmap_bwrap_notify;
2335261909Sluigi		hostna->nm_mem = na->nm_mem;
2336261909Sluigi		hostna->na_private = bna;
2337270063Sluigi		hostna->na_vp = &bna->up;
2338270063Sluigi		na->na_hostvp = hwna->na_hostvp =
2339270063Sluigi			hostna->na_hostvp = &bna->host;
2340270063Sluigi		hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */
2341261909Sluigi	}
2342259412Sluigi
2343260368Sluigi	ND("%s<->%s txr %d txd %d rxr %d rxd %d",
2344270063Sluigi		na->name, ifp->if_xname,
2345259412Sluigi		na->num_tx_rings, na->num_tx_desc,
2346259412Sluigi		na->num_rx_rings, na->num_rx_desc);
2347259412Sluigi
2348259412Sluigi	error = netmap_attach_common(na);
2349259412Sluigi	if (error) {
2350270063Sluigi		goto err_free;
2351259412Sluigi	}
2352270063Sluigi	/* make bwrap ifp point to the real ifp
2353270063Sluigi	 * NOTE: netmap_attach_common() interprets a non-NULL na->ifp
2354270063Sluigi	 * as a request to make the ifp point to the na. Since we
2355270063Sluigi	 * do not want to change the na already pointed to by hwna->ifp,
2356270063Sluigi	 * the following assignment has to be delayed until now
2357270063Sluigi	 */
2358270063Sluigi	na->ifp = hwna->ifp;
2359270063Sluigi	hwna->na_flags |= NAF_BUSY;
2360270063Sluigi	/* make hwna point to the allocator we are actually using,
2361270063Sluigi	 * so that monitors will be able to find it
2362270063Sluigi	 */
2363270063Sluigi	bna->save_nmd = hwna->nm_mem;
2364270063Sluigi	hwna->nm_mem = na->nm_mem;
2365259412Sluigi	return 0;
2366270063Sluigi
2367270063Sluigierr_free:
2368285349Sluigi	netmap_mem_delete(na->nm_mem);
2369270063Sluigierr_put:
2370270063Sluigi	hwna->na_vp = hwna->na_hostvp = NULL;
2371270063Sluigi	netmap_adapter_put(hwna);
2372270063Sluigi	free(bna, M_DEVBUF);
2373270063Sluigi	return error;
2374270063Sluigi
2375259412Sluigi}
2376259412Sluigi
2377285349Sluigistruct nm_bridge *
2378285349Sluiginetmap_init_bridges2(u_int n)
2379285349Sluigi{
2380285349Sluigi	int i;
2381285349Sluigi	struct nm_bridge *b;
2382260368Sluigi
2383285349Sluigi	b = malloc(sizeof(struct nm_bridge) * n, M_DEVBUF,
2384285349Sluigi		M_NOWAIT | M_ZERO);
2385285349Sluigi	if (b == NULL)
2386285349Sluigi		return NULL;
2387285349Sluigi	for (i = 0; i < n; i++)
2388285349Sluigi		BDG_RWINIT(&b[i]);
2389285349Sluigi	return b;
2390285349Sluigi}
2391285349Sluigi
2392259412Sluigivoid
2393285349Sluiginetmap_uninit_bridges2(struct nm_bridge *b, u_int n)
2394259412Sluigi{
2395259412Sluigi	int i;
2396285349Sluigi
2397285349Sluigi	if (b == NULL)
2398285349Sluigi		return;
2399285349Sluigi
2400285349Sluigi	for (i = 0; i < n; i++)
2401285349Sluigi		BDG_RWDESTROY(&b[i]);
2402285349Sluigi	free(b, M_DEVBUF);
2403259412Sluigi}
2404285349Sluigi
2405285349Sluigiint
2406285349Sluiginetmap_init_bridges(void)
2407285349Sluigi{
2408285349Sluigi#ifdef CONFIG_NET_NS
2409285349Sluigi	return netmap_bns_register();
2410285349Sluigi#else
2411285349Sluigi	nm_bridges = netmap_init_bridges2(NM_BRIDGES);
2412285349Sluigi	if (nm_bridges == NULL)
2413285349Sluigi		return ENOMEM;
2414285349Sluigi	return 0;
2415285349Sluigi#endif
2416285349Sluigi}
2417285349Sluigi
2418285349Sluigivoid
2419285349Sluiginetmap_uninit_bridges(void)
2420285349Sluigi{
2421285349Sluigi#ifdef CONFIG_NET_NS
2422285349Sluigi	netmap_bns_unregister();
2423285349Sluigi#else
2424285349Sluigi	netmap_uninit_bridges2(nm_bridges, NM_BRIDGES);
2425285349Sluigi#endif
2426285349Sluigi}
2427259412Sluigi#endif /* WITH_VALE */
2428