netmap_vale.c revision 270063
1259412Sluigi/*
2260368Sluigi * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
3259412Sluigi *
4259412Sluigi * Redistribution and use in source and binary forms, with or without
5259412Sluigi * modification, are permitted provided that the following conditions
6259412Sluigi * are met:
7259412Sluigi *   1. Redistributions of source code must retain the above copyright
8259412Sluigi *      notice, this list of conditions and the following disclaimer.
9259412Sluigi *   2. Redistributions in binary form must reproduce the above copyright
10259412Sluigi *      notice, this list of conditions and the following disclaimer in the
11259412Sluigi *      documentation and/or other materials provided with the distribution.
12259412Sluigi *
13259412Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14259412Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15259412Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16259412Sluigi * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17259412Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18259412Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19259412Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20259412Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21259412Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22259412Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23259412Sluigi * SUCH DAMAGE.
24259412Sluigi */
25259412Sluigi
26259412Sluigi
27259412Sluigi/*
28259412Sluigi * This module implements the VALE switch for netmap
29259412Sluigi
30259412Sluigi--- VALE SWITCH ---
31259412Sluigi
32259412SluigiNMG_LOCK() serializes all modifications to switches and ports.
33259412SluigiA switch cannot be deleted until all ports are gone.
34259412Sluigi
35259412SluigiFor each switch, an SX lock (RWlock on linux) protects
36259412Sluigideletion of ports. When configuring or deleting a new port, the
37259412Sluigilock is acquired in exclusive mode (after holding NMG_LOCK).
38259412SluigiWhen forwarding, the lock is acquired in shared mode (without NMG_LOCK).
39259412SluigiThe lock is held throughout the entire forwarding cycle,
40259412Sluigiduring which the thread may incur in a page fault.
41259412SluigiHence it is important that sleepable shared locks are used.
42259412Sluigi
43259412SluigiOn the rx ring, the per-port lock is grabbed initially to reserve
44259412Sluigia number of slot in the ring, then the lock is released,
45259412Sluigipackets are copied from source to destination, and then
46259412Sluigithe lock is acquired again and the receive ring is updated.
47259412Sluigi(A similar thing is done on the tx ring for NIC and host stack
48259412Sluigiports attached to the switch)
49259412Sluigi
50259412Sluigi */
51259412Sluigi
52259412Sluigi/*
53259412Sluigi * OS-specific code that is used only within this file.
54259412Sluigi * Other OS-specific code that must be accessed by drivers
55259412Sluigi * is present in netmap_kern.h
56259412Sluigi */
57259412Sluigi
58259412Sluigi#if defined(__FreeBSD__)
59259412Sluigi#include <sys/cdefs.h> /* prerequisite */
60259412Sluigi__FBSDID("$FreeBSD: head/sys/dev/netmap/netmap_vale.c 270063 2014-08-16 15:00:01Z luigi $");
61259412Sluigi
62259412Sluigi#include <sys/types.h>
63259412Sluigi#include <sys/errno.h>
64259412Sluigi#include <sys/param.h>	/* defines used in kernel.h */
65259412Sluigi#include <sys/kernel.h>	/* types used in module initialization */
66259412Sluigi#include <sys/conf.h>	/* cdevsw struct, UID, GID */
67259412Sluigi#include <sys/sockio.h>
68259412Sluigi#include <sys/socketvar.h>	/* struct socket */
69259412Sluigi#include <sys/malloc.h>
70259412Sluigi#include <sys/poll.h>
71259412Sluigi#include <sys/rwlock.h>
72259412Sluigi#include <sys/socket.h> /* sockaddrs */
73259412Sluigi#include <sys/selinfo.h>
74259412Sluigi#include <sys/sysctl.h>
75259412Sluigi#include <net/if.h>
76259412Sluigi#include <net/if_var.h>
77259412Sluigi#include <net/bpf.h>		/* BIOCIMMEDIATE */
78259412Sluigi#include <machine/bus.h>	/* bus_dmamap_* */
79259412Sluigi#include <sys/endian.h>
80259412Sluigi#include <sys/refcount.h>
81259412Sluigi
82259412Sluigi
83259412Sluigi#define BDG_RWLOCK_T		struct rwlock // struct rwlock
84259412Sluigi
85259412Sluigi#define	BDG_RWINIT(b)		\
86259412Sluigi	rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS)
87259412Sluigi#define BDG_WLOCK(b)		rw_wlock(&(b)->bdg_lock)
88259412Sluigi#define BDG_WUNLOCK(b)		rw_wunlock(&(b)->bdg_lock)
89259412Sluigi#define BDG_RLOCK(b)		rw_rlock(&(b)->bdg_lock)
90259412Sluigi#define BDG_RTRYLOCK(b)		rw_try_rlock(&(b)->bdg_lock)
91259412Sluigi#define BDG_RUNLOCK(b)		rw_runlock(&(b)->bdg_lock)
92259412Sluigi#define BDG_RWDESTROY(b)	rw_destroy(&(b)->bdg_lock)
93259412Sluigi
94259412Sluigi
95259412Sluigi#elif defined(linux)
96259412Sluigi
97259412Sluigi#include "bsd_glue.h"
98259412Sluigi
99259412Sluigi#elif defined(__APPLE__)
100259412Sluigi
101259412Sluigi#warning OSX support is only partial
102259412Sluigi#include "osx_glue.h"
103259412Sluigi
104259412Sluigi#else
105259412Sluigi
106259412Sluigi#error	Unsupported platform
107259412Sluigi
108259412Sluigi#endif /* unsupported */
109259412Sluigi
110259412Sluigi/*
111259412Sluigi * common headers
112259412Sluigi */
113259412Sluigi
114259412Sluigi#include <net/netmap.h>
115259412Sluigi#include <dev/netmap/netmap_kern.h>
116259412Sluigi#include <dev/netmap/netmap_mem2.h>
117259412Sluigi
118259412Sluigi#ifdef WITH_VALE
119259412Sluigi
120259412Sluigi/*
121259412Sluigi * system parameters (most of them in netmap_kern.h)
122259412Sluigi * NM_NAME	prefix for switch port names, default "vale"
123259412Sluigi * NM_BDG_MAXPORTS	number of ports
124259412Sluigi * NM_BRIDGES	max number of switches in the system.
125259412Sluigi *	XXX should become a sysctl or tunable
126259412Sluigi *
127259412Sluigi * Switch ports are named valeX:Y where X is the switch name and Y
128259412Sluigi * is the port. If Y matches a physical interface name, the port is
129259412Sluigi * connected to a physical device.
130259412Sluigi *
131259412Sluigi * Unlike physical interfaces, switch ports use their own memory region
132259412Sluigi * for rings and buffers.
133259412Sluigi * The virtual interfaces use per-queue lock instead of core lock.
134259412Sluigi * In the tx loop, we aggregate traffic in batches to make all operations
135259412Sluigi * faster. The batch size is bridge_batch.
136259412Sluigi */
137259412Sluigi#define NM_BDG_MAXRINGS		16	/* XXX unclear how many. */
138259412Sluigi#define NM_BDG_MAXSLOTS		4096	/* XXX same as above */
139259412Sluigi#define NM_BRIDGE_RINGSIZE	1024	/* in the device */
140259412Sluigi#define NM_BDG_HASH		1024	/* forwarding table entries */
141259412Sluigi#define NM_BDG_BATCH		1024	/* entries in the forwarding buffer */
142259412Sluigi#define NM_MULTISEG		64	/* max size of a chain of bufs */
143259412Sluigi/* actual size of the tables */
144259412Sluigi#define NM_BDG_BATCH_MAX	(NM_BDG_BATCH + NM_MULTISEG)
145259412Sluigi/* NM_FT_NULL terminates a list of slots in the ft */
146259412Sluigi#define NM_FT_NULL		NM_BDG_BATCH_MAX
147259412Sluigi#define	NM_BRIDGES		8	/* number of bridges */
148259412Sluigi
149259412Sluigi
150259412Sluigi/*
151259412Sluigi * bridge_batch is set via sysctl to the max batch size to be
152259412Sluigi * used in the bridge. The actual value may be larger as the
153259412Sluigi * last packet in the block may overflow the size.
154259412Sluigi */
155259412Sluigiint bridge_batch = NM_BDG_BATCH; /* bridge batch size */
156259412SluigiSYSCTL_DECL(_dev_netmap);
157259412SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
158259412Sluigi
159259412Sluigi
160270063Sluigistatic int netmap_vp_create(struct nmreq *, struct ifnet *, struct netmap_vp_adapter **);
161270063Sluigistatic int netmap_vp_reg(struct netmap_adapter *na, int onoff);
162259412Sluigistatic int netmap_bwrap_register(struct netmap_adapter *, int onoff);
163259412Sluigi
164259412Sluigi/*
165259412Sluigi * For each output interface, nm_bdg_q is used to construct a list.
166259412Sluigi * bq_len is the number of output buffers (we can have coalescing
167259412Sluigi * during the copy).
168259412Sluigi */
169259412Sluigistruct nm_bdg_q {
170259412Sluigi	uint16_t bq_head;
171259412Sluigi	uint16_t bq_tail;
172259412Sluigi	uint32_t bq_len;	/* number of buffers */
173259412Sluigi};
174259412Sluigi
175259412Sluigi/* XXX revise this */
176259412Sluigistruct nm_hash_ent {
177259412Sluigi	uint64_t	mac;	/* the top 2 bytes are the epoch */
178259412Sluigi	uint64_t	ports;
179259412Sluigi};
180259412Sluigi
181259412Sluigi/*
182259412Sluigi * nm_bridge is a descriptor for a VALE switch.
183259412Sluigi * Interfaces for a bridge are all in bdg_ports[].
184259412Sluigi * The array has fixed size, an empty entry does not terminate
185259412Sluigi * the search, but lookups only occur on attach/detach so we
186259412Sluigi * don't mind if they are slow.
187259412Sluigi *
188259412Sluigi * The bridge is non blocking on the transmit ports: excess
189259412Sluigi * packets are dropped if there is no room on the output port.
190259412Sluigi *
191259412Sluigi * bdg_lock protects accesses to the bdg_ports array.
192259412Sluigi * This is a rw lock (or equivalent).
193259412Sluigi */
194259412Sluigistruct nm_bridge {
195259412Sluigi	/* XXX what is the proper alignment/layout ? */
196259412Sluigi	BDG_RWLOCK_T	bdg_lock;	/* protects bdg_ports */
197259412Sluigi	int		bdg_namelen;
198259412Sluigi	uint32_t	bdg_active_ports; /* 0 means free */
199259412Sluigi	char		bdg_basename[IFNAMSIZ];
200259412Sluigi
201259412Sluigi	/* Indexes of active ports (up to active_ports)
202259412Sluigi	 * and all other remaining ports.
203259412Sluigi	 */
204259412Sluigi	uint8_t		bdg_port_index[NM_BDG_MAXPORTS];
205259412Sluigi
206259412Sluigi	struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS];
207259412Sluigi
208259412Sluigi
209259412Sluigi	/*
210259412Sluigi	 * The function to decide the destination port.
211259412Sluigi	 * It returns either of an index of the destination port,
212259412Sluigi	 * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
213259412Sluigi	 * forward this packet.  ring_nr is the source ring index, and the
214259412Sluigi	 * function may overwrite this value to forward this packet to a
215259412Sluigi	 * different ring index.
216259412Sluigi	 * This function must be set by netmap_bdgctl().
217259412Sluigi	 */
218270063Sluigi	struct netmap_bdg_ops bdg_ops;
219259412Sluigi
220259412Sluigi	/* the forwarding table, MAC+ports.
221259412Sluigi	 * XXX should be changed to an argument to be passed to
222259412Sluigi	 * the lookup function, and allocated on attach
223259412Sluigi	 */
224259412Sluigi	struct nm_hash_ent ht[NM_BDG_HASH];
225259412Sluigi};
226259412Sluigi
227270063Sluigiconst char*
228270063Sluiginetmap_bdg_name(struct netmap_vp_adapter *vp)
229270063Sluigi{
230270063Sluigi	struct nm_bridge *b = vp->na_bdg;
231270063Sluigi	if (b == NULL)
232270063Sluigi		return NULL;
233270063Sluigi	return b->bdg_basename;
234270063Sluigi}
235259412Sluigi
236270063Sluigi
237259412Sluigi/*
238259412Sluigi * XXX in principle nm_bridges could be created dynamically
239259412Sluigi * Right now we have a static array and deletions are protected
240259412Sluigi * by an exclusive lock.
241259412Sluigi */
242259412Sluigistruct nm_bridge nm_bridges[NM_BRIDGES];
243259412Sluigi
244259412Sluigi
245259412Sluigi/*
246259412Sluigi * this is a slightly optimized copy routine which rounds
247259412Sluigi * to multiple of 64 bytes and is often faster than dealing
248259412Sluigi * with other odd sizes. We assume there is enough room
249259412Sluigi * in the source and destination buffers.
250259412Sluigi *
251259412Sluigi * XXX only for multiples of 64 bytes, non overlapped.
252259412Sluigi */
253259412Sluigistatic inline void
254259412Sluigipkt_copy(void *_src, void *_dst, int l)
255259412Sluigi{
256259412Sluigi        uint64_t *src = _src;
257259412Sluigi        uint64_t *dst = _dst;
258259412Sluigi        if (unlikely(l >= 1024)) {
259259412Sluigi                memcpy(dst, src, l);
260259412Sluigi                return;
261259412Sluigi        }
262259412Sluigi        for (; likely(l > 0); l-=64) {
263259412Sluigi                *dst++ = *src++;
264259412Sluigi                *dst++ = *src++;
265259412Sluigi                *dst++ = *src++;
266259412Sluigi                *dst++ = *src++;
267259412Sluigi                *dst++ = *src++;
268259412Sluigi                *dst++ = *src++;
269259412Sluigi                *dst++ = *src++;
270259412Sluigi                *dst++ = *src++;
271259412Sluigi        }
272259412Sluigi}
273259412Sluigi
274259412Sluigi
275259412Sluigi/*
276259412Sluigi * locate a bridge among the existing ones.
277259412Sluigi * MUST BE CALLED WITH NMG_LOCK()
278259412Sluigi *
279259412Sluigi * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
280259412Sluigi * We assume that this is called with a name of at least NM_NAME chars.
281259412Sluigi */
282259412Sluigistatic struct nm_bridge *
283259412Sluiginm_find_bridge(const char *name, int create)
284259412Sluigi{
285259412Sluigi	int i, l, namelen;
286259412Sluigi	struct nm_bridge *b = NULL;
287259412Sluigi
288259412Sluigi	NMG_LOCK_ASSERT();
289259412Sluigi
290259412Sluigi	namelen = strlen(NM_NAME);	/* base length */
291259412Sluigi	l = name ? strlen(name) : 0;		/* actual length */
292259412Sluigi	if (l < namelen) {
293259412Sluigi		D("invalid bridge name %s", name ? name : NULL);
294259412Sluigi		return NULL;
295259412Sluigi	}
296259412Sluigi	for (i = namelen + 1; i < l; i++) {
297259412Sluigi		if (name[i] == ':') {
298259412Sluigi			namelen = i;
299259412Sluigi			break;
300259412Sluigi		}
301259412Sluigi	}
302259412Sluigi	if (namelen >= IFNAMSIZ)
303259412Sluigi		namelen = IFNAMSIZ;
304259412Sluigi	ND("--- prefix is '%.*s' ---", namelen, name);
305259412Sluigi
306259412Sluigi	/* lookup the name, remember empty slot if there is one */
307259412Sluigi	for (i = 0; i < NM_BRIDGES; i++) {
308259412Sluigi		struct nm_bridge *x = nm_bridges + i;
309259412Sluigi
310259412Sluigi		if (x->bdg_active_ports == 0) {
311259412Sluigi			if (create && b == NULL)
312259412Sluigi				b = x;	/* record empty slot */
313259412Sluigi		} else if (x->bdg_namelen != namelen) {
314259412Sluigi			continue;
315259412Sluigi		} else if (strncmp(name, x->bdg_basename, namelen) == 0) {
316259412Sluigi			ND("found '%.*s' at %d", namelen, name, i);
317259412Sluigi			b = x;
318259412Sluigi			break;
319259412Sluigi		}
320259412Sluigi	}
321259412Sluigi	if (i == NM_BRIDGES && b) { /* name not found, can create entry */
322259412Sluigi		/* initialize the bridge */
323259412Sluigi		strncpy(b->bdg_basename, name, namelen);
324259412Sluigi		ND("create new bridge %s with ports %d", b->bdg_basename,
325259412Sluigi			b->bdg_active_ports);
326259412Sluigi		b->bdg_namelen = namelen;
327259412Sluigi		b->bdg_active_ports = 0;
328259412Sluigi		for (i = 0; i < NM_BDG_MAXPORTS; i++)
329259412Sluigi			b->bdg_port_index[i] = i;
330259412Sluigi		/* set the default function */
331270063Sluigi		b->bdg_ops.lookup = netmap_bdg_learning;
332259412Sluigi		/* reset the MAC address table */
333259412Sluigi		bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
334259412Sluigi	}
335259412Sluigi	return b;
336259412Sluigi}
337259412Sluigi
338259412Sluigi
339259412Sluigi/*
340259412Sluigi * Free the forwarding tables for rings attached to switch ports.
341259412Sluigi */
342259412Sluigistatic void
343259412Sluiginm_free_bdgfwd(struct netmap_adapter *na)
344259412Sluigi{
345259412Sluigi	int nrings, i;
346259412Sluigi	struct netmap_kring *kring;
347259412Sluigi
348259412Sluigi	NMG_LOCK_ASSERT();
349260368Sluigi	nrings = na->num_tx_rings;
350260368Sluigi	kring = na->tx_rings;
351259412Sluigi	for (i = 0; i < nrings; i++) {
352259412Sluigi		if (kring[i].nkr_ft) {
353259412Sluigi			free(kring[i].nkr_ft, M_DEVBUF);
354259412Sluigi			kring[i].nkr_ft = NULL; /* protect from freeing twice */
355259412Sluigi		}
356259412Sluigi	}
357259412Sluigi}
358259412Sluigi
359259412Sluigi
360259412Sluigi/*
361259412Sluigi * Allocate the forwarding tables for the rings attached to the bridge ports.
362259412Sluigi */
363259412Sluigistatic int
364259412Sluiginm_alloc_bdgfwd(struct netmap_adapter *na)
365259412Sluigi{
366259412Sluigi	int nrings, l, i, num_dstq;
367259412Sluigi	struct netmap_kring *kring;
368259412Sluigi
369259412Sluigi	NMG_LOCK_ASSERT();
370259412Sluigi	/* all port:rings + broadcast */
371259412Sluigi	num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
372259412Sluigi	l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
373259412Sluigi	l += sizeof(struct nm_bdg_q) * num_dstq;
374259412Sluigi	l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
375259412Sluigi
376261909Sluigi	nrings = netmap_real_tx_rings(na);
377259412Sluigi	kring = na->tx_rings;
378259412Sluigi	for (i = 0; i < nrings; i++) {
379259412Sluigi		struct nm_bdg_fwd *ft;
380259412Sluigi		struct nm_bdg_q *dstq;
381259412Sluigi		int j;
382259412Sluigi
383259412Sluigi		ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
384259412Sluigi		if (!ft) {
385259412Sluigi			nm_free_bdgfwd(na);
386259412Sluigi			return ENOMEM;
387259412Sluigi		}
388259412Sluigi		dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
389259412Sluigi		for (j = 0; j < num_dstq; j++) {
390259412Sluigi			dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
391259412Sluigi			dstq[j].bq_len = 0;
392259412Sluigi		}
393259412Sluigi		kring[i].nkr_ft = ft;
394259412Sluigi	}
395259412Sluigi	return 0;
396259412Sluigi}
397259412Sluigi
398259412Sluigi
399270063Sluigi/* remove from bridge b the ports in slots hw and sw
400270063Sluigi * (sw can be -1 if not needed)
401270063Sluigi */
402259412Sluigistatic void
403259412Sluiginetmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
404259412Sluigi{
405259412Sluigi	int s_hw = hw, s_sw = sw;
406259412Sluigi	int i, lim =b->bdg_active_ports;
407259412Sluigi	uint8_t tmp[NM_BDG_MAXPORTS];
408259412Sluigi
409259412Sluigi	/*
410259412Sluigi	New algorithm:
411259412Sluigi	make a copy of bdg_port_index;
412259412Sluigi	lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
413259412Sluigi	in the array of bdg_port_index, replacing them with
414259412Sluigi	entries from the bottom of the array;
415259412Sluigi	decrement bdg_active_ports;
416259412Sluigi	acquire BDG_WLOCK() and copy back the array.
417259412Sluigi	 */
418259412Sluigi
419261909Sluigi	if (netmap_verbose)
420261909Sluigi		D("detach %d and %d (lim %d)", hw, sw, lim);
421259412Sluigi	/* make a copy of the list of active ports, update it,
422259412Sluigi	 * and then copy back within BDG_WLOCK().
423259412Sluigi	 */
424259412Sluigi	memcpy(tmp, b->bdg_port_index, sizeof(tmp));
425259412Sluigi	for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
426259412Sluigi		if (hw >= 0 && tmp[i] == hw) {
427259412Sluigi			ND("detach hw %d at %d", hw, i);
428259412Sluigi			lim--; /* point to last active port */
429259412Sluigi			tmp[i] = tmp[lim]; /* swap with i */
430259412Sluigi			tmp[lim] = hw;	/* now this is inactive */
431259412Sluigi			hw = -1;
432259412Sluigi		} else if (sw >= 0 && tmp[i] == sw) {
433259412Sluigi			ND("detach sw %d at %d", sw, i);
434259412Sluigi			lim--;
435259412Sluigi			tmp[i] = tmp[lim];
436259412Sluigi			tmp[lim] = sw;
437259412Sluigi			sw = -1;
438259412Sluigi		} else {
439259412Sluigi			i++;
440259412Sluigi		}
441259412Sluigi	}
442259412Sluigi	if (hw >= 0 || sw >= 0) {
443259412Sluigi		D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
444259412Sluigi	}
445259412Sluigi
446259412Sluigi	BDG_WLOCK(b);
447270063Sluigi	if (b->bdg_ops.dtor)
448270063Sluigi		b->bdg_ops.dtor(b->bdg_ports[s_hw]);
449259412Sluigi	b->bdg_ports[s_hw] = NULL;
450259412Sluigi	if (s_sw >= 0) {
451259412Sluigi		b->bdg_ports[s_sw] = NULL;
452259412Sluigi	}
453259412Sluigi	memcpy(b->bdg_port_index, tmp, sizeof(tmp));
454259412Sluigi	b->bdg_active_ports = lim;
455259412Sluigi	BDG_WUNLOCK(b);
456259412Sluigi
457259412Sluigi	ND("now %d active ports", lim);
458259412Sluigi	if (lim == 0) {
459259412Sluigi		ND("marking bridge %s as free", b->bdg_basename);
460270063Sluigi		bzero(&b->bdg_ops, sizeof(b->bdg_ops));
461259412Sluigi	}
462259412Sluigi}
463259412Sluigi
464270063Sluigi/* nm_bdg_ctl callback for VALE ports */
465270063Sluigistatic int
466270063Sluiginetmap_vp_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
467270063Sluigi{
468270063Sluigi	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
469270063Sluigi	struct nm_bridge *b = vpna->na_bdg;
470260368Sluigi
471270063Sluigi	if (attach)
472270063Sluigi		return 0; /* nothing to do */
473270063Sluigi	if (b) {
474270063Sluigi		netmap_set_all_rings(na, 0 /* disable */);
475270063Sluigi		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
476270063Sluigi		vpna->na_bdg = NULL;
477270063Sluigi		netmap_set_all_rings(na, 1 /* enable */);
478270063Sluigi	}
479270063Sluigi	/* I have took reference just for attach */
480270063Sluigi	netmap_adapter_put(na);
481270063Sluigi	return 0;
482270063Sluigi}
483270063Sluigi
484270063Sluigi/* nm_dtor callback for ephemeral VALE ports */
485259412Sluigistatic void
486270063Sluiginetmap_vp_dtor(struct netmap_adapter *na)
487259412Sluigi{
488259412Sluigi	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
489259412Sluigi	struct nm_bridge *b = vpna->na_bdg;
490259412Sluigi
491270063Sluigi	ND("%s has %d references", na->name, na->na_refcount);
492259412Sluigi
493259412Sluigi	if (b) {
494259412Sluigi		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
495259412Sluigi	}
496270063Sluigi}
497259412Sluigi
498270063Sluigi/* nm_dtor callback for persistent VALE ports */
499270063Sluigistatic void
500270063Sluiginetmap_persist_vp_dtor(struct netmap_adapter *na)
501270063Sluigi{
502270063Sluigi	struct ifnet *ifp = na->ifp;
503270063Sluigi
504270063Sluigi	netmap_vp_dtor(na);
505259412Sluigi	na->ifp = NULL;
506270063Sluigi	nm_vi_detach(ifp);
507259412Sluigi}
508259412Sluigi
509270063Sluigi/* remove a persistent VALE port from the system */
510270063Sluigistatic int
511270063Sluiginm_vi_destroy(const char *name)
512270063Sluigi{
513270063Sluigi	struct ifnet *ifp;
514270063Sluigi	int error;
515260368Sluigi
516270063Sluigi	ifp = ifunit_ref(name);
517270063Sluigi	if (!ifp)
518270063Sluigi		return ENXIO;
519270063Sluigi	NMG_LOCK();
520270063Sluigi	/* make sure this is actually a VALE port */
521270063Sluigi	if (!NETMAP_CAPABLE(ifp) || NA(ifp)->nm_register != netmap_vp_reg) {
522270063Sluigi		error = EINVAL;
523270063Sluigi		goto err;
524270063Sluigi	}
525270063Sluigi
526270063Sluigi	if (NA(ifp)->na_refcount > 1) {
527270063Sluigi		error = EBUSY;
528270063Sluigi		goto err;
529270063Sluigi	}
530270063Sluigi	NMG_UNLOCK();
531270063Sluigi
532270063Sluigi	D("destroying a persistent vale interface %s", ifp->if_xname);
533270063Sluigi	/* Linux requires all the references are released
534270063Sluigi	 * before unregister
535270063Sluigi	 */
536270063Sluigi	if_rele(ifp);
537270063Sluigi	netmap_detach(ifp);
538270063Sluigi	return 0;
539270063Sluigi
540270063Sluigierr:
541270063Sluigi	NMG_UNLOCK();
542270063Sluigi	if_rele(ifp);
543270063Sluigi	return error;
544270063Sluigi}
545270063Sluigi
546270063Sluigi/*
547270063Sluigi * Create a virtual interface registered to the system.
548270063Sluigi * The interface will be attached to a bridge later.
549270063Sluigi */
550270063Sluigistatic int
551270063Sluiginm_vi_create(struct nmreq *nmr)
552270063Sluigi{
553270063Sluigi	struct ifnet *ifp;
554270063Sluigi	struct netmap_vp_adapter *vpna;
555270063Sluigi	int error;
556270063Sluigi
557270063Sluigi	/* don't include VALE prefix */
558270063Sluigi	if (!strncmp(nmr->nr_name, NM_NAME, strlen(NM_NAME)))
559270063Sluigi		return EINVAL;
560270063Sluigi	ifp = ifunit_ref(nmr->nr_name);
561270063Sluigi	if (ifp) { /* already exist, cannot create new one */
562270063Sluigi		if_rele(ifp);
563270063Sluigi		return EEXIST;
564270063Sluigi	}
565270063Sluigi	error = nm_vi_persist(nmr->nr_name, &ifp);
566270063Sluigi	if (error)
567270063Sluigi		return error;
568270063Sluigi
569270063Sluigi	NMG_LOCK();
570270063Sluigi	/* netmap_vp_create creates a struct netmap_vp_adapter */
571270063Sluigi	error = netmap_vp_create(nmr, ifp, &vpna);
572270063Sluigi	if (error) {
573270063Sluigi		D("error %d", error);
574270063Sluigi		nm_vi_detach(ifp);
575270063Sluigi		return error;
576270063Sluigi	}
577270063Sluigi	/* persist-specific routines */
578270063Sluigi	vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl;
579270063Sluigi	vpna->up.nm_dtor = netmap_persist_vp_dtor;
580270063Sluigi	netmap_adapter_get(&vpna->up);
581270063Sluigi	NMG_UNLOCK();
582270063Sluigi	D("created %s", ifp->if_xname);
583270063Sluigi	return 0;
584270063Sluigi}
585270063Sluigi
586260368Sluigi/* Try to get a reference to a netmap adapter attached to a VALE switch.
587260368Sluigi * If the adapter is found (or is created), this function returns 0, a
588260368Sluigi * non NULL pointer is returned into *na, and the caller holds a
589260368Sluigi * reference to the adapter.
590260368Sluigi * If an adapter is not found, then no reference is grabbed and the
591260368Sluigi * function returns an error code, or 0 if there is just a VALE prefix
592260368Sluigi * mismatch. Therefore the caller holds a reference when
593260368Sluigi * (*na != NULL && return == 0).
594260368Sluigi */
595259412Sluigiint
596259412Sluiginetmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
597259412Sluigi{
598270063Sluigi	char *nr_name = nmr->nr_name;
599270063Sluigi	const char *ifname;
600259412Sluigi	struct ifnet *ifp;
601259412Sluigi	int error = 0;
602270063Sluigi	struct netmap_vp_adapter *vpna, *hostna = NULL;
603259412Sluigi	struct nm_bridge *b;
604259412Sluigi	int i, j, cand = -1, cand2 = -1;
605259412Sluigi	int needed;
606259412Sluigi
607259412Sluigi	*na = NULL;     /* default return value */
608259412Sluigi
609259412Sluigi	/* first try to see if this is a bridge port. */
610259412Sluigi	NMG_LOCK_ASSERT();
611270063Sluigi	if (strncmp(nr_name, NM_NAME, sizeof(NM_NAME) - 1)) {
612259412Sluigi		return 0;  /* no error, but no VALE prefix */
613259412Sluigi	}
614259412Sluigi
615270063Sluigi	b = nm_find_bridge(nr_name, create);
616259412Sluigi	if (b == NULL) {
617270063Sluigi		D("no bridges available for '%s'", nr_name);
618260700Sluigi		return (create ? ENOMEM : ENXIO);
619259412Sluigi	}
620270063Sluigi	if (strlen(nr_name) < b->bdg_namelen) /* impossible */
621270063Sluigi		panic("x");
622259412Sluigi
623259412Sluigi	/* Now we are sure that name starts with the bridge's name,
624259412Sluigi	 * lookup the port in the bridge. We need to scan the entire
625259412Sluigi	 * list. It is not important to hold a WLOCK on the bridge
626259412Sluigi	 * during the search because NMG_LOCK already guarantees
627259412Sluigi	 * that there are no other possible writers.
628259412Sluigi	 */
629259412Sluigi
630259412Sluigi	/* lookup in the local list of ports */
631259412Sluigi	for (j = 0; j < b->bdg_active_ports; j++) {
632259412Sluigi		i = b->bdg_port_index[j];
633259412Sluigi		vpna = b->bdg_ports[i];
634259412Sluigi		// KASSERT(na != NULL);
635270063Sluigi		D("checking %s", vpna->up.name);
636270063Sluigi		if (!strcmp(vpna->up.name, nr_name)) {
637259412Sluigi			netmap_adapter_get(&vpna->up);
638270063Sluigi			ND("found existing if %s refs %d", nr_name)
639270063Sluigi			*na = &vpna->up;
640259412Sluigi			return 0;
641259412Sluigi		}
642259412Sluigi	}
643259412Sluigi	/* not found, should we create it? */
644259412Sluigi	if (!create)
645259412Sluigi		return ENXIO;
646259412Sluigi	/* yes we should, see if we have space to attach entries */
647259412Sluigi	needed = 2; /* in some cases we only need 1 */
648259412Sluigi	if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
649259412Sluigi		D("bridge full %d, cannot create new port", b->bdg_active_ports);
650260700Sluigi		return ENOMEM;
651259412Sluigi	}
652259412Sluigi	/* record the next two ports available, but do not allocate yet */
653259412Sluigi	cand = b->bdg_port_index[b->bdg_active_ports];
654259412Sluigi	cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
655259412Sluigi	ND("+++ bridge %s port %s used %d avail %d %d",
656270063Sluigi		b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2);
657259412Sluigi
658259412Sluigi	/*
659259412Sluigi	 * try see if there is a matching NIC with this name
660259412Sluigi	 * (after the bridge's name)
661259412Sluigi	 */
662270063Sluigi	ifname = nr_name + b->bdg_namelen + 1;
663270063Sluigi	ifp = ifunit_ref(ifname);
664270063Sluigi	if (!ifp) {
665270063Sluigi		/* Create an ephemeral virtual port
666270063Sluigi		 * This block contains all the ephemeral-specific logics
667270063Sluigi		 */
668259412Sluigi		if (nmr->nr_cmd) {
669259412Sluigi			/* nr_cmd must be 0 for a virtual port */
670259412Sluigi			return EINVAL;
671259412Sluigi		}
672259412Sluigi
673259412Sluigi		/* bdg_netmap_attach creates a struct netmap_adapter */
674270063Sluigi		error = netmap_vp_create(nmr, NULL, &vpna);
675259412Sluigi		if (error) {
676259412Sluigi			D("error %d", error);
677259412Sluigi			free(ifp, M_DEVBUF);
678259412Sluigi			return error;
679259412Sluigi		}
680270063Sluigi		/* shortcut - we can skip get_hw_na(),
681270063Sluigi		 * ownership check and nm_bdg_attach()
682270063Sluigi		 */
683270063Sluigi	} else {
684270063Sluigi		struct netmap_adapter *hw;
685259412Sluigi
686270063Sluigi		error = netmap_get_hw_na(ifp, &hw);
687270063Sluigi		if (error || hw == NULL)
688259412Sluigi			goto out;
689259412Sluigi
690270063Sluigi		/* host adapter might not be created */
691270063Sluigi		error = hw->nm_bdg_attach(nr_name, hw);
692270063Sluigi		if (error)
693259412Sluigi			goto out;
694270063Sluigi		vpna = hw->na_vp;
695270063Sluigi		hostna = hw->na_hostvp;
696270063Sluigi		if_rele(ifp);
697259412Sluigi		if (nmr->nr_arg1 != NETMAP_BDG_HOST)
698270063Sluigi			hostna = NULL;
699259412Sluigi	}
700259412Sluigi
701259412Sluigi	BDG_WLOCK(b);
702259412Sluigi	vpna->bdg_port = cand;
703259412Sluigi	ND("NIC  %p to bridge port %d", vpna, cand);
704259412Sluigi	/* bind the port to the bridge (virtual ports are not active) */
705259412Sluigi	b->bdg_ports[cand] = vpna;
706259412Sluigi	vpna->na_bdg = b;
707259412Sluigi	b->bdg_active_ports++;
708270063Sluigi	if (hostna != NULL) {
709259412Sluigi		/* also bind the host stack to the bridge */
710259412Sluigi		b->bdg_ports[cand2] = hostna;
711259412Sluigi		hostna->bdg_port = cand2;
712259412Sluigi		hostna->na_bdg = b;
713259412Sluigi		b->bdg_active_ports++;
714259412Sluigi		ND("host %p to bridge port %d", hostna, cand2);
715259412Sluigi	}
716270063Sluigi	ND("if %s refs %d", ifname, vpna->up.na_refcount);
717259412Sluigi	BDG_WUNLOCK(b);
718270063Sluigi	*na = &vpna->up;
719270063Sluigi	netmap_adapter_get(*na);
720259412Sluigi	return 0;
721259412Sluigi
722259412Sluigiout:
723259412Sluigi	if_rele(ifp);
724259412Sluigi
725259412Sluigi	return error;
726259412Sluigi}
727259412Sluigi
728259412Sluigi
729270063Sluigi/* Process NETMAP_BDG_ATTACH */
730259412Sluigistatic int
731270063Sluiginm_bdg_ctl_attach(struct nmreq *nmr)
732259412Sluigi{
733259412Sluigi	struct netmap_adapter *na;
734259412Sluigi	int error;
735259412Sluigi
736259412Sluigi	NMG_LOCK();
737260700Sluigi
738260368Sluigi	error = netmap_get_bdg_na(nmr, &na, 1 /* create if not exists */);
739270063Sluigi	if (error) /* no device */
740259412Sluigi		goto unlock_exit;
741260700Sluigi
742260368Sluigi	if (na == NULL) { /* VALE prefix missing */
743259412Sluigi		error = EINVAL;
744260368Sluigi		goto unlock_exit;
745259412Sluigi	}
746259412Sluigi
747270063Sluigi	if (NETMAP_OWNED_BY_ANY(na)) {
748259412Sluigi		error = EBUSY;
749259412Sluigi		goto unref_exit;
750259412Sluigi	}
751259412Sluigi
752270063Sluigi	if (na->nm_bdg_ctl) {
753270063Sluigi		/* nop for VALE ports. The bwrap needs to put the hwna
754270063Sluigi		 * in netmap mode (see netmap_bwrap_bdg_ctl)
755270063Sluigi		 */
756270063Sluigi		error = na->nm_bdg_ctl(na, nmr, 1);
757270063Sluigi		if (error)
758270063Sluigi			goto unref_exit;
759270063Sluigi		ND("registered %s to netmap-mode", na->name);
760259412Sluigi	}
761259412Sluigi	NMG_UNLOCK();
762259412Sluigi	return 0;
763259412Sluigi
764259412Sluigiunref_exit:
765259412Sluigi	netmap_adapter_put(na);
766259412Sluigiunlock_exit:
767259412Sluigi	NMG_UNLOCK();
768259412Sluigi	return error;
769259412Sluigi}
770259412Sluigi
771260368Sluigi
772270063Sluigi/* process NETMAP_BDG_DETACH */
773259412Sluigistatic int
774270063Sluiginm_bdg_ctl_detach(struct nmreq *nmr)
775259412Sluigi{
776259412Sluigi	struct netmap_adapter *na;
777259412Sluigi	int error;
778259412Sluigi
779259412Sluigi	NMG_LOCK();
780260368Sluigi	error = netmap_get_bdg_na(nmr, &na, 0 /* don't create */);
781259412Sluigi	if (error) { /* no device, or another bridge or user owns the device */
782259412Sluigi		goto unlock_exit;
783259412Sluigi	}
784260700Sluigi
785260368Sluigi	if (na == NULL) { /* VALE prefix missing */
786259412Sluigi		error = EINVAL;
787260368Sluigi		goto unlock_exit;
788259412Sluigi	}
789260368Sluigi
790270063Sluigi	if (na->nm_bdg_ctl) {
791270063Sluigi		/* remove the port from bridge. The bwrap
792270063Sluigi		 * also needs to put the hwna in normal mode
793270063Sluigi		 */
794270063Sluigi		error = na->nm_bdg_ctl(na, nmr, 0);
795259412Sluigi	}
796259412Sluigi
797259412Sluigi	netmap_adapter_put(na);
798259412Sluigiunlock_exit:
799259412Sluigi	NMG_UNLOCK();
800259412Sluigi	return error;
801259412Sluigi
802259412Sluigi}
803259412Sluigi
804259412Sluigi
805270063Sluigi/* Called by either user's context (netmap_ioctl())
806270063Sluigi * or external kernel modules (e.g., Openvswitch).
807270063Sluigi * Operation is indicated in nmr->nr_cmd.
808270063Sluigi * NETMAP_BDG_OPS that sets configure/lookup/dtor functions to the bridge
809270063Sluigi * requires bdg_ops argument; the other commands ignore this argument.
810270063Sluigi *
811259412Sluigi * Called without NMG_LOCK.
812259412Sluigi */
813259412Sluigiint
814270063Sluiginetmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
815259412Sluigi{
816259412Sluigi	struct nm_bridge *b;
817259412Sluigi	struct netmap_adapter *na;
818259412Sluigi	struct netmap_vp_adapter *vpna;
819259412Sluigi	char *name = nmr->nr_name;
820259412Sluigi	int cmd = nmr->nr_cmd, namelen = strlen(name);
821259412Sluigi	int error = 0, i, j;
822259412Sluigi
823259412Sluigi	switch (cmd) {
824270063Sluigi	case NETMAP_BDG_NEWIF:
825270063Sluigi		error = nm_vi_create(nmr);
826270063Sluigi		break;
827270063Sluigi
828270063Sluigi	case NETMAP_BDG_DELIF:
829270063Sluigi		error = nm_vi_destroy(nmr->nr_name);
830270063Sluigi		break;
831270063Sluigi
832259412Sluigi	case NETMAP_BDG_ATTACH:
833270063Sluigi		error = nm_bdg_ctl_attach(nmr);
834259412Sluigi		break;
835259412Sluigi
836259412Sluigi	case NETMAP_BDG_DETACH:
837270063Sluigi		error = nm_bdg_ctl_detach(nmr);
838259412Sluigi		break;
839259412Sluigi
840259412Sluigi	case NETMAP_BDG_LIST:
841259412Sluigi		/* this is used to enumerate bridges and ports */
842259412Sluigi		if (namelen) { /* look up indexes of bridge and port */
843259412Sluigi			if (strncmp(name, NM_NAME, strlen(NM_NAME))) {
844259412Sluigi				error = EINVAL;
845259412Sluigi				break;
846259412Sluigi			}
847259412Sluigi			NMG_LOCK();
848259412Sluigi			b = nm_find_bridge(name, 0 /* don't create */);
849259412Sluigi			if (!b) {
850259412Sluigi				error = ENOENT;
851259412Sluigi				NMG_UNLOCK();
852259412Sluigi				break;
853259412Sluigi			}
854259412Sluigi
855270063Sluigi			name = name + b->bdg_namelen + 1;
856259412Sluigi			error = ENOENT;
857259412Sluigi			for (j = 0; j < b->bdg_active_ports; j++) {
858259412Sluigi				i = b->bdg_port_index[j];
859259412Sluigi				vpna = b->bdg_ports[i];
860259412Sluigi				if (vpna == NULL) {
861259412Sluigi					D("---AAAAAAAAARGH-------");
862259412Sluigi					continue;
863259412Sluigi				}
864259412Sluigi				/* the former and the latter identify a
865259412Sluigi				 * virtual port and a NIC, respectively
866259412Sluigi				 */
867270063Sluigi				if (!strcmp(vpna->up.name, name)) {
868259412Sluigi					/* bridge index */
869259412Sluigi					nmr->nr_arg1 = b - nm_bridges;
870259412Sluigi					nmr->nr_arg2 = i; /* port index */
871259412Sluigi					error = 0;
872259412Sluigi					break;
873259412Sluigi				}
874259412Sluigi			}
875259412Sluigi			NMG_UNLOCK();
876259412Sluigi		} else {
877259412Sluigi			/* return the first non-empty entry starting from
878259412Sluigi			 * bridge nr_arg1 and port nr_arg2.
879259412Sluigi			 *
880259412Sluigi			 * Users can detect the end of the same bridge by
881259412Sluigi			 * seeing the new and old value of nr_arg1, and can
882259412Sluigi			 * detect the end of all the bridge by error != 0
883259412Sluigi			 */
884259412Sluigi			i = nmr->nr_arg1;
885259412Sluigi			j = nmr->nr_arg2;
886259412Sluigi
887259412Sluigi			NMG_LOCK();
888259412Sluigi			for (error = ENOENT; i < NM_BRIDGES; i++) {
889259412Sluigi				b = nm_bridges + i;
890259412Sluigi				if (j >= b->bdg_active_ports) {
891259412Sluigi					j = 0; /* following bridges scan from 0 */
892259412Sluigi					continue;
893259412Sluigi				}
894259412Sluigi				nmr->nr_arg1 = i;
895259412Sluigi				nmr->nr_arg2 = j;
896259412Sluigi				j = b->bdg_port_index[j];
897259412Sluigi				vpna = b->bdg_ports[j];
898270063Sluigi				strncpy(name, vpna->up.name, (size_t)IFNAMSIZ);
899259412Sluigi				error = 0;
900259412Sluigi				break;
901259412Sluigi			}
902259412Sluigi			NMG_UNLOCK();
903259412Sluigi		}
904259412Sluigi		break;
905259412Sluigi
906270063Sluigi	case NETMAP_BDG_REGOPS: /* XXX this should not be available from userspace */
907270063Sluigi		/* register callbacks to the given bridge.
908259412Sluigi		 * nmr->nr_name may be just bridge's name (including ':'
909259412Sluigi		 * if it is not just NM_NAME).
910259412Sluigi		 */
911270063Sluigi		if (!bdg_ops) {
912259412Sluigi			error = EINVAL;
913259412Sluigi			break;
914259412Sluigi		}
915259412Sluigi		NMG_LOCK();
916259412Sluigi		b = nm_find_bridge(name, 0 /* don't create */);
917259412Sluigi		if (!b) {
918259412Sluigi			error = EINVAL;
919259412Sluigi		} else {
920270063Sluigi			b->bdg_ops = *bdg_ops;
921259412Sluigi		}
922259412Sluigi		NMG_UNLOCK();
923259412Sluigi		break;
924259412Sluigi
925261909Sluigi	case NETMAP_BDG_VNET_HDR:
926261909Sluigi		/* Valid lengths for the virtio-net header are 0 (no header),
927261909Sluigi		   10 and 12. */
928261909Sluigi		if (nmr->nr_arg1 != 0 &&
929261909Sluigi			nmr->nr_arg1 != sizeof(struct nm_vnet_hdr) &&
930261909Sluigi				nmr->nr_arg1 != 12) {
931261909Sluigi			error = EINVAL;
932261909Sluigi			break;
933261909Sluigi		}
934259412Sluigi		NMG_LOCK();
935259412Sluigi		error = netmap_get_bdg_na(nmr, &na, 0);
936260368Sluigi		if (na && !error) {
937259412Sluigi			vpna = (struct netmap_vp_adapter *)na;
938261909Sluigi			vpna->virt_hdr_len = nmr->nr_arg1;
939261909Sluigi			if (vpna->virt_hdr_len)
940270063Sluigi				vpna->mfs = NETMAP_BUF_SIZE(na);
941261909Sluigi			D("Using vnet_hdr_len %d for %p", vpna->virt_hdr_len, vpna);
942260368Sluigi			netmap_adapter_put(na);
943259412Sluigi		}
944259412Sluigi		NMG_UNLOCK();
945259412Sluigi		break;
946259412Sluigi
947259412Sluigi	default:
948259412Sluigi		D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd);
949259412Sluigi		error = EINVAL;
950259412Sluigi		break;
951259412Sluigi	}
952259412Sluigi	return error;
953259412Sluigi}
954259412Sluigi
955270063Sluigiint
956270063Sluiginetmap_bdg_config(struct nmreq *nmr)
957270063Sluigi{
958270063Sluigi	struct nm_bridge *b;
959270063Sluigi	int error = EINVAL;
960270063Sluigi
961270063Sluigi	NMG_LOCK();
962270063Sluigi	b = nm_find_bridge(nmr->nr_name, 0);
963270063Sluigi	if (!b) {
964270063Sluigi		NMG_UNLOCK();
965270063Sluigi		return error;
966270063Sluigi	}
967270063Sluigi	NMG_UNLOCK();
968270063Sluigi	/* Don't call config() with NMG_LOCK() held */
969270063Sluigi	BDG_RLOCK(b);
970270063Sluigi	if (b->bdg_ops.config != NULL)
971270063Sluigi		error = b->bdg_ops.config((struct nm_ifreq *)nmr);
972270063Sluigi	BDG_RUNLOCK(b);
973270063Sluigi	return error;
974270063Sluigi}
975270063Sluigi
976270063Sluigi
977270063Sluigi/* nm_krings_create callback for VALE ports.
978270063Sluigi * Calls the standard netmap_krings_create, then adds leases on rx
979270063Sluigi * rings and bdgfwd on tx rings.
980270063Sluigi */
981259412Sluigistatic int
982259412Sluiginetmap_vp_krings_create(struct netmap_adapter *na)
983259412Sluigi{
984261909Sluigi	u_int tailroom;
985259412Sluigi	int error, i;
986259412Sluigi	uint32_t *leases;
987261909Sluigi	u_int nrx = netmap_real_rx_rings(na);
988259412Sluigi
989259412Sluigi	/*
990259412Sluigi	 * Leases are attached to RX rings on vale ports
991259412Sluigi	 */
992259412Sluigi	tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;
993259412Sluigi
994261909Sluigi	error = netmap_krings_create(na, tailroom);
995259412Sluigi	if (error)
996259412Sluigi		return error;
997259412Sluigi
998259412Sluigi	leases = na->tailroom;
999259412Sluigi
1000259412Sluigi	for (i = 0; i < nrx; i++) { /* Receive rings */
1001259412Sluigi		na->rx_rings[i].nkr_leases = leases;
1002259412Sluigi		leases += na->num_rx_desc;
1003259412Sluigi	}
1004259412Sluigi
1005259412Sluigi	error = nm_alloc_bdgfwd(na);
1006259412Sluigi	if (error) {
1007259412Sluigi		netmap_krings_delete(na);
1008259412Sluigi		return error;
1009259412Sluigi	}
1010259412Sluigi
1011259412Sluigi	return 0;
1012259412Sluigi}
1013259412Sluigi
1014260368Sluigi
1015270063Sluigi/* nm_krings_delete callback for VALE ports. */
1016259412Sluigistatic void
1017259412Sluiginetmap_vp_krings_delete(struct netmap_adapter *na)
1018259412Sluigi{
1019259412Sluigi	nm_free_bdgfwd(na);
1020259412Sluigi	netmap_krings_delete(na);
1021259412Sluigi}
1022259412Sluigi
1023259412Sluigi
1024259412Sluigistatic int
1025259412Sluiginm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
1026259412Sluigi	struct netmap_vp_adapter *na, u_int ring_nr);
1027259412Sluigi
1028259412Sluigi
1029259412Sluigi/*
1030270063Sluigi * main dispatch routine for the bridge.
1031259412Sluigi * Grab packets from a kring, move them into the ft structure
1032259412Sluigi * associated to the tx (input) port. Max one instance per port,
1033259412Sluigi * filtered on input (ioctl, poll or XXX).
1034259412Sluigi * Returns the next position in the ring.
1035259412Sluigi */
1036259412Sluigistatic int
1037270063Sluiginm_bdg_preflush(struct netmap_kring *kring, u_int end)
1038259412Sluigi{
1039270063Sluigi	struct netmap_vp_adapter *na =
1040270063Sluigi		(struct netmap_vp_adapter*)kring->na;
1041259412Sluigi	struct netmap_ring *ring = kring->ring;
1042259412Sluigi	struct nm_bdg_fwd *ft;
1043270063Sluigi	u_int ring_nr = kring->ring_id;
1044259412Sluigi	u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
1045259412Sluigi	u_int ft_i = 0;	/* start from 0 */
1046259412Sluigi	u_int frags = 1; /* how many frags ? */
1047259412Sluigi	struct nm_bridge *b = na->na_bdg;
1048259412Sluigi
1049259412Sluigi	/* To protect against modifications to the bridge we acquire a
1050259412Sluigi	 * shared lock, waiting if we can sleep (if the source port is
1051259412Sluigi	 * attached to a user process) or with a trylock otherwise (NICs).
1052259412Sluigi	 */
1053259412Sluigi	ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
1054259412Sluigi	if (na->up.na_flags & NAF_BDG_MAYSLEEP)
1055259412Sluigi		BDG_RLOCK(b);
1056259412Sluigi	else if (!BDG_RTRYLOCK(b))
1057259412Sluigi		return 0;
1058259412Sluigi	ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
1059259412Sluigi	ft = kring->nkr_ft;
1060259412Sluigi
1061259412Sluigi	for (; likely(j != end); j = nm_next(j, lim)) {
1062259412Sluigi		struct netmap_slot *slot = &ring->slot[j];
1063259412Sluigi		char *buf;
1064259412Sluigi
1065259412Sluigi		ft[ft_i].ft_len = slot->len;
1066259412Sluigi		ft[ft_i].ft_flags = slot->flags;
1067259412Sluigi
1068259412Sluigi		ND("flags is 0x%x", slot->flags);
1069259412Sluigi		/* this slot goes into a list so initialize the link field */
1070259412Sluigi		ft[ft_i].ft_next = NM_FT_NULL;
1071259412Sluigi		buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
1072270063Sluigi			(void *)(uintptr_t)slot->ptr : NMB(&na->up, slot);
1073267151Sluigi		if (unlikely(buf == NULL)) {
1074267151Sluigi			RD(5, "NULL %s buffer pointer from %s slot %d len %d",
1075267151Sluigi				(slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT",
1076267151Sluigi				kring->name, j, ft[ft_i].ft_len);
1077270063Sluigi			buf = ft[ft_i].ft_buf = NETMAP_BUF_BASE(&na->up);
1078267151Sluigi			ft[ft_i].ft_len = 0;
1079267151Sluigi			ft[ft_i].ft_flags = 0;
1080267151Sluigi		}
1081259487Sluigi		__builtin_prefetch(buf);
1082259412Sluigi		++ft_i;
1083259412Sluigi		if (slot->flags & NS_MOREFRAG) {
1084259412Sluigi			frags++;
1085259412Sluigi			continue;
1086259412Sluigi		}
1087259412Sluigi		if (unlikely(netmap_verbose && frags > 1))
1088259412Sluigi			RD(5, "%d frags at %d", frags, ft_i - frags);
1089259412Sluigi		ft[ft_i - frags].ft_frags = frags;
1090259412Sluigi		frags = 1;
1091259412Sluigi		if (unlikely((int)ft_i >= bridge_batch))
1092259412Sluigi			ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1093259412Sluigi	}
1094259412Sluigi	if (frags > 1) {
1095259412Sluigi		D("truncate incomplete fragment at %d (%d frags)", ft_i, frags);
1096259412Sluigi		// ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG
1097259412Sluigi		ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG;
1098259412Sluigi		ft[ft_i - frags].ft_frags = frags - 1;
1099259412Sluigi	}
1100259412Sluigi	if (ft_i)
1101259412Sluigi		ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1102259412Sluigi	BDG_RUNLOCK(b);
1103259412Sluigi	return j;
1104259412Sluigi}
1105259412Sluigi
1106259412Sluigi
1107259412Sluigi/* ----- FreeBSD if_bridge hash function ------- */
1108259412Sluigi
1109259412Sluigi/*
1110259412Sluigi * The following hash function is adapted from "Hash Functions" by Bob Jenkins
1111259412Sluigi * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
1112259412Sluigi *
1113259412Sluigi * http://www.burtleburtle.net/bob/hash/spooky.html
1114259412Sluigi */
1115259412Sluigi#define mix(a, b, c)                                                    \
1116259412Sluigido {                                                                    \
1117259412Sluigi        a -= b; a -= c; a ^= (c >> 13);                                 \
1118259412Sluigi        b -= c; b -= a; b ^= (a << 8);                                  \
1119259412Sluigi        c -= a; c -= b; c ^= (b >> 13);                                 \
1120259412Sluigi        a -= b; a -= c; a ^= (c >> 12);                                 \
1121259412Sluigi        b -= c; b -= a; b ^= (a << 16);                                 \
1122259412Sluigi        c -= a; c -= b; c ^= (b >> 5);                                  \
1123259412Sluigi        a -= b; a -= c; a ^= (c >> 3);                                  \
1124259412Sluigi        b -= c; b -= a; b ^= (a << 10);                                 \
1125259412Sluigi        c -= a; c -= b; c ^= (b >> 15);                                 \
1126259412Sluigi} while (/*CONSTCOND*/0)
1127259412Sluigi
1128260368Sluigi
1129259412Sluigistatic __inline uint32_t
1130259412Sluiginm_bridge_rthash(const uint8_t *addr)
1131259412Sluigi{
1132259412Sluigi        uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
1133259412Sluigi
1134259412Sluigi        b += addr[5] << 8;
1135259412Sluigi        b += addr[4];
1136259412Sluigi        a += addr[3] << 24;
1137259412Sluigi        a += addr[2] << 16;
1138259412Sluigi        a += addr[1] << 8;
1139259412Sluigi        a += addr[0];
1140259412Sluigi
1141259412Sluigi        mix(a, b, c);
1142259412Sluigi#define BRIDGE_RTHASH_MASK	(NM_BDG_HASH-1)
1143259412Sluigi        return (c & BRIDGE_RTHASH_MASK);
1144259412Sluigi}
1145259412Sluigi
1146259412Sluigi#undef mix
1147259412Sluigi
1148259412Sluigi
1149270063Sluigi/* nm_register callback for VALE ports */
1150259412Sluigistatic int
1151270063Sluiginetmap_vp_reg(struct netmap_adapter *na, int onoff)
1152259412Sluigi{
1153259412Sluigi	struct netmap_vp_adapter *vpna =
1154259412Sluigi		(struct netmap_vp_adapter*)na;
1155259412Sluigi
1156270063Sluigi	/* persistent ports may be put in netmap mode
1157270063Sluigi	 * before being attached to a bridge
1158259412Sluigi	 */
1159270063Sluigi	if (vpna->na_bdg)
1160270063Sluigi		BDG_WLOCK(vpna->na_bdg);
1161259412Sluigi	if (onoff) {
1162270063Sluigi		na->na_flags |= NAF_NETMAP_ON;
1163270063Sluigi		 /* XXX on FreeBSD, persistent VALE ports should also
1164270063Sluigi		 * toggle IFCAP_NETMAP in na->ifp (2014-03-16)
1165270063Sluigi		 */
1166259412Sluigi	} else {
1167270063Sluigi		na->na_flags &= ~NAF_NETMAP_ON;
1168259412Sluigi	}
1169270063Sluigi	if (vpna->na_bdg)
1170270063Sluigi		BDG_WUNLOCK(vpna->na_bdg);
1171259412Sluigi	return 0;
1172259412Sluigi}
1173259412Sluigi
1174259412Sluigi
1175259412Sluigi/*
1176259412Sluigi * Lookup function for a learning bridge.
1177259412Sluigi * Update the hash table with the source address,
1178259412Sluigi * and then returns the destination port index, and the
1179259412Sluigi * ring in *dst_ring (at the moment, always use ring 0)
1180259412Sluigi */
1181259412Sluigiu_int
1182270063Sluiginetmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
1183270063Sluigi		const struct netmap_vp_adapter *na)
1184259412Sluigi{
1185270063Sluigi	uint8_t *buf = ft->ft_buf;
1186270063Sluigi	u_int buf_len = ft->ft_len;
1187259412Sluigi	struct nm_hash_ent *ht = na->na_bdg->ht;
1188259412Sluigi	uint32_t sh, dh;
1189259412Sluigi	u_int dst, mysrc = na->bdg_port;
1190259412Sluigi	uint64_t smac, dmac;
1191259412Sluigi
1192270063Sluigi	/* safety check, unfortunately we have many cases */
1193270063Sluigi	if (buf_len >= 14 + na->virt_hdr_len) {
1194270063Sluigi		/* virthdr + mac_hdr in the same slot */
1195270063Sluigi		buf += na->virt_hdr_len;
1196270063Sluigi		buf_len -= na->virt_hdr_len;
1197270063Sluigi	} else if (buf_len == na->virt_hdr_len && ft->ft_flags & NS_MOREFRAG) {
1198270063Sluigi		/* only header in first fragment */
1199270063Sluigi		ft++;
1200270063Sluigi		buf = ft->ft_buf;
1201270063Sluigi		buf_len = ft->ft_len;
1202270063Sluigi	} else {
1203270063Sluigi		RD(5, "invalid buf format, length %d", buf_len);
1204259412Sluigi		return NM_BDG_NOPORT;
1205259412Sluigi	}
1206259412Sluigi	dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
1207259412Sluigi	smac = le64toh(*(uint64_t *)(buf + 4));
1208259412Sluigi	smac >>= 16;
1209259412Sluigi
1210259412Sluigi	/*
1211259412Sluigi	 * The hash is somewhat expensive, there might be some
1212259412Sluigi	 * worthwhile optimizations here.
1213259412Sluigi	 */
1214259412Sluigi	if ((buf[6] & 1) == 0) { /* valid src */
1215259412Sluigi		uint8_t *s = buf+6;
1216259412Sluigi		sh = nm_bridge_rthash(s); // XXX hash of source
1217259412Sluigi		/* update source port forwarding entry */
1218259412Sluigi		ht[sh].mac = smac;	/* XXX expire ? */
1219259412Sluigi		ht[sh].ports = mysrc;
1220259412Sluigi		if (netmap_verbose)
1221259412Sluigi		    D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
1222259412Sluigi			s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
1223259412Sluigi	}
1224259412Sluigi	dst = NM_BDG_BROADCAST;
1225259412Sluigi	if ((buf[0] & 1) == 0) { /* unicast */
1226259412Sluigi		dh = nm_bridge_rthash(buf); // XXX hash of dst
1227259412Sluigi		if (ht[dh].mac == dmac) {	/* found dst */
1228259412Sluigi			dst = ht[dh].ports;
1229259412Sluigi		}
1230259412Sluigi		/* XXX otherwise return NM_BDG_UNKNOWN ? */
1231259412Sluigi	}
1232259412Sluigi	*dst_ring = 0;
1233259412Sluigi	return dst;
1234259412Sluigi}
1235259412Sluigi
1236259412Sluigi
1237259412Sluigi/*
1238260368Sluigi * Available space in the ring. Only used in VALE code
1239260368Sluigi * and only with is_rx = 1
1240260368Sluigi */
1241260368Sluigistatic inline uint32_t
1242260368Sluiginm_kr_space(struct netmap_kring *k, int is_rx)
1243260368Sluigi{
1244260368Sluigi	int space;
1245260368Sluigi
1246260368Sluigi	if (is_rx) {
1247260368Sluigi		int busy = k->nkr_hwlease - k->nr_hwcur;
1248260368Sluigi		if (busy < 0)
1249260368Sluigi			busy += k->nkr_num_slots;
1250260368Sluigi		space = k->nkr_num_slots - 1 - busy;
1251260368Sluigi	} else {
1252260368Sluigi		/* XXX never used in this branch */
1253260368Sluigi		space = k->nr_hwtail - k->nkr_hwlease;
1254260368Sluigi		if (space < 0)
1255260368Sluigi			space += k->nkr_num_slots;
1256260368Sluigi	}
1257260368Sluigi#if 0
1258260368Sluigi	// sanity check
1259260368Sluigi	if (k->nkr_hwlease >= k->nkr_num_slots ||
1260260368Sluigi		k->nr_hwcur >= k->nkr_num_slots ||
1261260368Sluigi		k->nr_tail >= k->nkr_num_slots ||
1262260368Sluigi		busy < 0 ||
1263260368Sluigi		busy >= k->nkr_num_slots) {
1264260368Sluigi		D("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d",			k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
1265260368Sluigi			k->nkr_lease_idx, k->nkr_num_slots);
1266260368Sluigi	}
1267260368Sluigi#endif
1268260368Sluigi	return space;
1269260368Sluigi}
1270260368Sluigi
1271260368Sluigi
1272260368Sluigi
1273260368Sluigi
1274260368Sluigi/* make a lease on the kring for N positions. return the
1275260368Sluigi * lease index
1276260368Sluigi * XXX only used in VALE code and with is_rx = 1
1277260368Sluigi */
1278260368Sluigistatic inline uint32_t
1279260368Sluiginm_kr_lease(struct netmap_kring *k, u_int n, int is_rx)
1280260368Sluigi{
1281260368Sluigi	uint32_t lim = k->nkr_num_slots - 1;
1282260368Sluigi	uint32_t lease_idx = k->nkr_lease_idx;
1283260368Sluigi
1284260368Sluigi	k->nkr_leases[lease_idx] = NR_NOSLOT;
1285260368Sluigi	k->nkr_lease_idx = nm_next(lease_idx, lim);
1286260368Sluigi
1287260368Sluigi	if (n > nm_kr_space(k, is_rx)) {
1288260368Sluigi		D("invalid request for %d slots", n);
1289260368Sluigi		panic("x");
1290260368Sluigi	}
1291260368Sluigi	/* XXX verify that there are n slots */
1292260368Sluigi	k->nkr_hwlease += n;
1293260368Sluigi	if (k->nkr_hwlease > lim)
1294260368Sluigi		k->nkr_hwlease -= lim + 1;
1295260368Sluigi
1296260368Sluigi	if (k->nkr_hwlease >= k->nkr_num_slots ||
1297260368Sluigi		k->nr_hwcur >= k->nkr_num_slots ||
1298260368Sluigi		k->nr_hwtail >= k->nkr_num_slots ||
1299260368Sluigi		k->nkr_lease_idx >= k->nkr_num_slots) {
1300260368Sluigi		D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d",
1301270063Sluigi			k->na->name,
1302260368Sluigi			k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
1303260368Sluigi			k->nkr_lease_idx, k->nkr_num_slots);
1304260368Sluigi	}
1305260368Sluigi	return lease_idx;
1306260368Sluigi}
1307260368Sluigi
1308260368Sluigi/*
1309270063Sluigi *
1310259412Sluigi * This flush routine supports only unicast and broadcast but a large
1311259412Sluigi * number of ports, and lets us replace the learn and dispatch functions.
1312259412Sluigi */
1313259412Sluigiint
1314259412Sluiginm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
1315259412Sluigi		u_int ring_nr)
1316259412Sluigi{
1317259412Sluigi	struct nm_bdg_q *dst_ents, *brddst;
1318259412Sluigi	uint16_t num_dsts = 0, *dsts;
1319259412Sluigi	struct nm_bridge *b = na->na_bdg;
1320259412Sluigi	u_int i, j, me = na->bdg_port;
1321259412Sluigi
1322259412Sluigi	/*
1323259412Sluigi	 * The work area (pointed by ft) is followed by an array of
1324259412Sluigi	 * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
1325259412Sluigi	 * queues per port plus one for the broadcast traffic.
1326259412Sluigi	 * Then we have an array of destination indexes.
1327259412Sluigi	 */
1328259412Sluigi	dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
1329259412Sluigi	dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
1330259412Sluigi
1331259412Sluigi	/* first pass: find a destination for each packet in the batch */
1332259412Sluigi	for (i = 0; likely(i < n); i += ft[i].ft_frags) {
1333259412Sluigi		uint8_t dst_ring = ring_nr; /* default, same ring as origin */
1334259412Sluigi		uint16_t dst_port, d_i;
1335259412Sluigi		struct nm_bdg_q *d;
1336259412Sluigi
1337259412Sluigi		ND("slot %d frags %d", i, ft[i].ft_frags);
1338261909Sluigi		/* Drop the packet if the virtio-net header is not into the first
1339259412Sluigi		   fragment nor at the very beginning of the second. */
1340270063Sluigi		if (unlikely(na->virt_hdr_len > ft[i].ft_len))
1341259412Sluigi			continue;
1342270063Sluigi		dst_port = b->bdg_ops.lookup(&ft[i], &dst_ring, na);
1343259412Sluigi		if (netmap_verbose > 255)
1344259412Sluigi			RD(5, "slot %d port %d -> %d", i, me, dst_port);
1345259412Sluigi		if (dst_port == NM_BDG_NOPORT)
1346259412Sluigi			continue; /* this packet is identified to be dropped */
1347259412Sluigi		else if (unlikely(dst_port > NM_BDG_MAXPORTS))
1348259412Sluigi			continue;
1349259412Sluigi		else if (dst_port == NM_BDG_BROADCAST)
1350259412Sluigi			dst_ring = 0; /* broadcasts always go to ring 0 */
1351259412Sluigi		else if (unlikely(dst_port == me ||
1352259412Sluigi		    !b->bdg_ports[dst_port]))
1353259412Sluigi			continue;
1354259412Sluigi
1355259412Sluigi		/* get a position in the scratch pad */
1356259412Sluigi		d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
1357259412Sluigi		d = dst_ents + d_i;
1358259412Sluigi
1359259412Sluigi		/* append the first fragment to the list */
1360259412Sluigi		if (d->bq_head == NM_FT_NULL) { /* new destination */
1361259412Sluigi			d->bq_head = d->bq_tail = i;
1362259412Sluigi			/* remember this position to be scanned later */
1363259412Sluigi			if (dst_port != NM_BDG_BROADCAST)
1364259412Sluigi				dsts[num_dsts++] = d_i;
1365259412Sluigi		} else {
1366259412Sluigi			ft[d->bq_tail].ft_next = i;
1367259412Sluigi			d->bq_tail = i;
1368259412Sluigi		}
1369259412Sluigi		d->bq_len += ft[i].ft_frags;
1370259412Sluigi	}
1371259412Sluigi
1372259412Sluigi	/*
1373259412Sluigi	 * Broadcast traffic goes to ring 0 on all destinations.
1374259412Sluigi	 * So we need to add these rings to the list of ports to scan.
1375259412Sluigi	 * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
1376259412Sluigi	 * expensive. We should keep a compact list of active destinations
1377259412Sluigi	 * so we could shorten this loop.
1378259412Sluigi	 */
1379259412Sluigi	brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
1380259412Sluigi	if (brddst->bq_head != NM_FT_NULL) {
1381259412Sluigi		for (j = 0; likely(j < b->bdg_active_ports); j++) {
1382259412Sluigi			uint16_t d_i;
1383259412Sluigi			i = b->bdg_port_index[j];
1384259412Sluigi			if (unlikely(i == me))
1385259412Sluigi				continue;
1386259412Sluigi			d_i = i * NM_BDG_MAXRINGS;
1387259412Sluigi			if (dst_ents[d_i].bq_head == NM_FT_NULL)
1388259412Sluigi				dsts[num_dsts++] = d_i;
1389259412Sluigi		}
1390259412Sluigi	}
1391259412Sluigi
1392259412Sluigi	ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
1393270063Sluigi	/* second pass: scan destinations */
1394259412Sluigi	for (i = 0; i < num_dsts; i++) {
1395259412Sluigi		struct netmap_vp_adapter *dst_na;
1396259412Sluigi		struct netmap_kring *kring;
1397259412Sluigi		struct netmap_ring *ring;
1398261909Sluigi		u_int dst_nr, lim, j, d_i, next, brd_next;
1399259412Sluigi		u_int needed, howmany;
1400259412Sluigi		int retry = netmap_txsync_retry;
1401259412Sluigi		struct nm_bdg_q *d;
1402259412Sluigi		uint32_t my_start = 0, lease_idx = 0;
1403259412Sluigi		int nrings;
1404261909Sluigi		int virt_hdr_mismatch = 0;
1405259412Sluigi
1406259412Sluigi		d_i = dsts[i];
1407259412Sluigi		ND("second pass %d port %d", i, d_i);
1408259412Sluigi		d = dst_ents + d_i;
1409259412Sluigi		// XXX fix the division
1410259412Sluigi		dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
1411259412Sluigi		/* protect from the lookup function returning an inactive
1412259412Sluigi		 * destination port
1413259412Sluigi		 */
1414259412Sluigi		if (unlikely(dst_na == NULL))
1415259412Sluigi			goto cleanup;
1416259412Sluigi		if (dst_na->up.na_flags & NAF_SW_ONLY)
1417259412Sluigi			goto cleanup;
1418259412Sluigi		/*
1419259412Sluigi		 * The interface may be in !netmap mode in two cases:
1420259412Sluigi		 * - when na is attached but not activated yet;
1421259412Sluigi		 * - when na is being deactivated but is still attached.
1422259412Sluigi		 */
1423270063Sluigi		if (unlikely(!nm_netmap_on(&dst_na->up))) {
1424259412Sluigi			ND("not in netmap mode!");
1425259412Sluigi			goto cleanup;
1426259412Sluigi		}
1427259412Sluigi
1428259412Sluigi		/* there is at least one either unicast or broadcast packet */
1429259412Sluigi		brd_next = brddst->bq_head;
1430259412Sluigi		next = d->bq_head;
1431259412Sluigi		/* we need to reserve this many slots. If fewer are
1432259412Sluigi		 * available, some packets will be dropped.
1433259412Sluigi		 * Packets may have multiple fragments, so we may not use
1434259412Sluigi		 * there is a chance that we may not use all of the slots
1435259412Sluigi		 * we have claimed, so we will need to handle the leftover
1436259412Sluigi		 * ones when we regain the lock.
1437259412Sluigi		 */
1438259412Sluigi		needed = d->bq_len + brddst->bq_len;
1439259412Sluigi
1440261909Sluigi		if (unlikely(dst_na->virt_hdr_len != na->virt_hdr_len)) {
1441270063Sluigi			RD(3, "virt_hdr_mismatch, src %d dst %d", na->virt_hdr_len, dst_na->virt_hdr_len);
1442261909Sluigi			/* There is a virtio-net header/offloadings mismatch between
1443261909Sluigi			 * source and destination. The slower mismatch datapath will
1444261909Sluigi			 * be used to cope with all the mismatches.
1445261909Sluigi			 */
1446261909Sluigi			virt_hdr_mismatch = 1;
1447261909Sluigi			if (dst_na->mfs < na->mfs) {
1448261909Sluigi				/* We may need to do segmentation offloadings, and so
1449261909Sluigi				 * we may need a number of destination slots greater
1450261909Sluigi				 * than the number of input slots ('needed').
1451261909Sluigi				 * We look for the smallest integer 'x' which satisfies:
1452261909Sluigi				 *	needed * na->mfs + x * H <= x * na->mfs
1453261909Sluigi				 * where 'H' is the length of the longest header that may
1454261909Sluigi				 * be replicated in the segmentation process (e.g. for
1455261909Sluigi				 * TCPv4 we must account for ethernet header, IP header
1456261909Sluigi				 * and TCPv4 header).
1457261909Sluigi				 */
1458261909Sluigi				needed = (needed * na->mfs) /
1459261909Sluigi						(dst_na->mfs - WORST_CASE_GSO_HEADER) + 1;
1460261909Sluigi				ND(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed);
1461261909Sluigi			}
1462261909Sluigi		}
1463261909Sluigi
1464259412Sluigi		ND(5, "pass 2 dst %d is %x %s",
1465259412Sluigi			i, d_i, is_vp ? "virtual" : "nic/host");
1466259412Sluigi		dst_nr = d_i & (NM_BDG_MAXRINGS-1);
1467259412Sluigi		nrings = dst_na->up.num_rx_rings;
1468259412Sluigi		if (dst_nr >= nrings)
1469259412Sluigi			dst_nr = dst_nr % nrings;
1470259412Sluigi		kring = &dst_na->up.rx_rings[dst_nr];
1471259412Sluigi		ring = kring->ring;
1472259412Sluigi		lim = kring->nkr_num_slots - 1;
1473259412Sluigi
1474259412Sluigiretry:
1475259412Sluigi
1476261909Sluigi		if (dst_na->retry && retry) {
1477261909Sluigi			/* try to get some free slot from the previous run */
1478261909Sluigi			dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1479270063Sluigi			/* actually useful only for bwraps, since there
1480270063Sluigi			 * the notify will trigger a txsync on the hwna. VALE ports
1481270063Sluigi			 * have dst_na->retry == 0
1482270063Sluigi			 */
1483261909Sluigi		}
1484259412Sluigi		/* reserve the buffers in the queue and an entry
1485259412Sluigi		 * to report completion, and drop lock.
1486259412Sluigi		 * XXX this might become a helper function.
1487259412Sluigi		 */
1488259412Sluigi		mtx_lock(&kring->q_lock);
1489259412Sluigi		if (kring->nkr_stopped) {
1490259412Sluigi			mtx_unlock(&kring->q_lock);
1491259412Sluigi			goto cleanup;
1492259412Sluigi		}
1493259412Sluigi		my_start = j = kring->nkr_hwlease;
1494259412Sluigi		howmany = nm_kr_space(kring, 1);
1495259412Sluigi		if (needed < howmany)
1496259412Sluigi			howmany = needed;
1497259412Sluigi		lease_idx = nm_kr_lease(kring, howmany, 1);
1498259412Sluigi		mtx_unlock(&kring->q_lock);
1499259412Sluigi
1500259412Sluigi		/* only retry if we need more than available slots */
1501259412Sluigi		if (retry && needed <= howmany)
1502259412Sluigi			retry = 0;
1503259412Sluigi
1504259412Sluigi		/* copy to the destination queue */
1505259412Sluigi		while (howmany > 0) {
1506259412Sluigi			struct netmap_slot *slot;
1507259412Sluigi			struct nm_bdg_fwd *ft_p, *ft_end;
1508259412Sluigi			u_int cnt;
1509259412Sluigi
1510259412Sluigi			/* find the queue from which we pick next packet.
1511259412Sluigi			 * NM_FT_NULL is always higher than valid indexes
1512259412Sluigi			 * so we never dereference it if the other list
1513259412Sluigi			 * has packets (and if both are empty we never
1514259412Sluigi			 * get here).
1515259412Sluigi			 */
1516259412Sluigi			if (next < brd_next) {
1517259412Sluigi				ft_p = ft + next;
1518259412Sluigi				next = ft_p->ft_next;
1519259412Sluigi			} else { /* insert broadcast */
1520259412Sluigi				ft_p = ft + brd_next;
1521259412Sluigi				brd_next = ft_p->ft_next;
1522259412Sluigi			}
1523259412Sluigi			cnt = ft_p->ft_frags; // cnt > 0
1524259412Sluigi			if (unlikely(cnt > howmany))
1525259412Sluigi			    break; /* no more space */
1526259412Sluigi			if (netmap_verbose && cnt > 1)
1527259412Sluigi				RD(5, "rx %d frags to %d", cnt, j);
1528259412Sluigi			ft_end = ft_p + cnt;
1529261909Sluigi			if (unlikely(virt_hdr_mismatch)) {
1530261909Sluigi				bdg_mismatch_datapath(na, dst_na, ft_p, ring, &j, lim, &howmany);
1531261909Sluigi			} else {
1532261909Sluigi				howmany -= cnt;
1533261909Sluigi				do {
1534261909Sluigi					char *dst, *src = ft_p->ft_buf;
1535261909Sluigi					size_t copy_len = ft_p->ft_len, dst_len = copy_len;
1536259412Sluigi
1537261909Sluigi					slot = &ring->slot[j];
1538270063Sluigi					dst = NMB(&dst_na->up, slot);
1539259412Sluigi
1540261909Sluigi					ND("send [%d] %d(%d) bytes at %s:%d",
1541261909Sluigi							i, (int)copy_len, (int)dst_len,
1542261909Sluigi							NM_IFPNAME(dst_ifp), j);
1543261909Sluigi					/* round to a multiple of 64 */
1544261909Sluigi					copy_len = (copy_len + 63) & ~63;
1545260368Sluigi
1546270063Sluigi					if (unlikely(copy_len > NETMAP_BUF_SIZE(&dst_na->up) ||
1547270063Sluigi						     copy_len > NETMAP_BUF_SIZE(&na->up))) {
1548267151Sluigi						RD(5, "invalid len %d, down to 64", (int)copy_len);
1549267151Sluigi						copy_len = dst_len = 64; // XXX
1550267151Sluigi					}
1551261909Sluigi					if (ft_p->ft_flags & NS_INDIRECT) {
1552261909Sluigi						if (copyin(src, dst, copy_len)) {
1553261909Sluigi							// invalid user pointer, pretend len is 0
1554261909Sluigi							dst_len = 0;
1555261909Sluigi						}
1556261909Sluigi					} else {
1557261909Sluigi						//memcpy(dst, src, copy_len);
1558261909Sluigi						pkt_copy(src, dst, (int)copy_len);
1559261909Sluigi					}
1560261909Sluigi					slot->len = dst_len;
1561261909Sluigi					slot->flags = (cnt << 8)| NS_MOREFRAG;
1562261909Sluigi					j = nm_next(j, lim);
1563261909Sluigi					needed--;
1564261909Sluigi					ft_p++;
1565261909Sluigi				} while (ft_p != ft_end);
1566261909Sluigi				slot->flags = (cnt << 8); /* clear flag on last entry */
1567261909Sluigi			}
1568259412Sluigi			/* are we done ? */
1569259412Sluigi			if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
1570259412Sluigi				break;
1571259412Sluigi		}
1572259412Sluigi		{
1573259412Sluigi		    /* current position */
1574259412Sluigi		    uint32_t *p = kring->nkr_leases; /* shorthand */
1575259412Sluigi		    uint32_t update_pos;
1576259412Sluigi		    int still_locked = 1;
1577259412Sluigi
1578259412Sluigi		    mtx_lock(&kring->q_lock);
1579259412Sluigi		    if (unlikely(howmany > 0)) {
1580259412Sluigi			/* not used all bufs. If i am the last one
1581259412Sluigi			 * i can recover the slots, otherwise must
1582259412Sluigi			 * fill them with 0 to mark empty packets.
1583259412Sluigi			 */
1584259412Sluigi			ND("leftover %d bufs", howmany);
1585259412Sluigi			if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
1586259412Sluigi			    /* yes i am the last one */
1587259412Sluigi			    ND("roll back nkr_hwlease to %d", j);
1588259412Sluigi			    kring->nkr_hwlease = j;
1589259412Sluigi			} else {
1590259412Sluigi			    while (howmany-- > 0) {
1591259412Sluigi				ring->slot[j].len = 0;
1592259412Sluigi				ring->slot[j].flags = 0;
1593259412Sluigi				j = nm_next(j, lim);
1594259412Sluigi			    }
1595259412Sluigi			}
1596259412Sluigi		    }
1597259412Sluigi		    p[lease_idx] = j; /* report I am done */
1598259412Sluigi
1599260368Sluigi		    update_pos = kring->nr_hwtail;
1600259412Sluigi
1601259412Sluigi		    if (my_start == update_pos) {
1602259412Sluigi			/* all slots before my_start have been reported,
1603259412Sluigi			 * so scan subsequent leases to see if other ranges
1604259412Sluigi			 * have been completed, and to a selwakeup or txsync.
1605259412Sluigi		         */
1606259412Sluigi			while (lease_idx != kring->nkr_lease_idx &&
1607259412Sluigi				p[lease_idx] != NR_NOSLOT) {
1608259412Sluigi			    j = p[lease_idx];
1609259412Sluigi			    p[lease_idx] = NR_NOSLOT;
1610259412Sluigi			    lease_idx = nm_next(lease_idx, lim);
1611259412Sluigi			}
1612259412Sluigi			/* j is the new 'write' position. j != my_start
1613259412Sluigi			 * means there are new buffers to report
1614259412Sluigi			 */
1615259412Sluigi			if (likely(j != my_start)) {
1616260368Sluigi				kring->nr_hwtail = j;
1617259412Sluigi				still_locked = 0;
1618259412Sluigi				mtx_unlock(&kring->q_lock);
1619261909Sluigi				dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1620270063Sluigi				/* this is netmap_notify for VALE ports and
1621270063Sluigi				 * netmap_bwrap_notify for bwrap. The latter will
1622270063Sluigi				 * trigger a txsync on the underlying hwna
1623270063Sluigi				 */
1624270063Sluigi				if (dst_na->retry && retry--) {
1625270063Sluigi					/* XXX this is going to call nm_notify again.
1626270063Sluigi					 * Only useful for bwrap in virtual machines
1627270063Sluigi					 */
1628259412Sluigi					goto retry;
1629270063Sluigi				}
1630259412Sluigi			}
1631259412Sluigi		    }
1632259412Sluigi		    if (still_locked)
1633259412Sluigi			mtx_unlock(&kring->q_lock);
1634259412Sluigi		}
1635259412Sluigicleanup:
1636259412Sluigi		d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
1637259412Sluigi		d->bq_len = 0;
1638259412Sluigi	}
1639259412Sluigi	brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
1640259412Sluigi	brddst->bq_len = 0;
1641259412Sluigi	return 0;
1642259412Sluigi}
1643259412Sluigi
1644270063Sluigi/* nm_txsync callback for VALE ports */
1645259412Sluigistatic int
1646270063Sluiginetmap_vp_txsync(struct netmap_kring *kring, int flags)
1647259412Sluigi{
1648270063Sluigi	struct netmap_vp_adapter *na =
1649270063Sluigi		(struct netmap_vp_adapter *)kring->na;
1650260368Sluigi	u_int done;
1651260368Sluigi	u_int const lim = kring->nkr_num_slots - 1;
1652260368Sluigi	u_int const cur = kring->rcur;
1653259412Sluigi
1654259412Sluigi	if (bridge_batch <= 0) { /* testing only */
1655260368Sluigi		done = cur; // used all
1656259412Sluigi		goto done;
1657259412Sluigi	}
1658270063Sluigi	if (!na->na_bdg) {
1659270063Sluigi		done = cur;
1660270063Sluigi		goto done;
1661270063Sluigi	}
1662259412Sluigi	if (bridge_batch > NM_BDG_BATCH)
1663259412Sluigi		bridge_batch = NM_BDG_BATCH;
1664259412Sluigi
1665270063Sluigi	done = nm_bdg_preflush(kring, cur);
1666259412Sluigidone:
1667260368Sluigi	if (done != cur)
1668260368Sluigi		D("early break at %d/ %d, tail %d", done, cur, kring->nr_hwtail);
1669260368Sluigi	/*
1670260368Sluigi	 * packets between 'done' and 'cur' are left unsent.
1671260368Sluigi	 */
1672260368Sluigi	kring->nr_hwcur = done;
1673260368Sluigi	kring->nr_hwtail = nm_prev(done, lim);
1674260368Sluigi	nm_txsync_finalize(kring);
1675259412Sluigi	if (netmap_verbose)
1676270063Sluigi		D("%s ring %d flags %d", na->up.name, kring->ring_id, flags);
1677259412Sluigi	return 0;
1678259412Sluigi}
1679259412Sluigi
1680259412Sluigi
1681270063Sluigi/* rxsync code used by VALE ports nm_rxsync callback and also
1682270063Sluigi * internally by the brwap
1683259412Sluigi */
1684259412Sluigistatic int
1685270063Sluiginetmap_vp_rxsync_locked(struct netmap_kring *kring, int flags)
1686259412Sluigi{
1687270063Sluigi	struct netmap_adapter *na = kring->na;
1688259412Sluigi	struct netmap_ring *ring = kring->ring;
1689260368Sluigi	u_int nm_i, lim = kring->nkr_num_slots - 1;
1690260368Sluigi	u_int head = nm_rxsync_prologue(kring);
1691259412Sluigi	int n;
1692259412Sluigi
1693260368Sluigi	if (head > lim) {
1694259412Sluigi		D("ouch dangerous reset!!!");
1695259412Sluigi		n = netmap_ring_reinit(kring);
1696259412Sluigi		goto done;
1697259412Sluigi	}
1698259412Sluigi
1699260368Sluigi	/* First part, import newly received packets. */
1700260368Sluigi	/* actually nothing to do here, they are already in the kring */
1701259412Sluigi
1702260368Sluigi	/* Second part, skip past packets that userspace has released. */
1703260368Sluigi	nm_i = kring->nr_hwcur;
1704260368Sluigi	if (nm_i != head) {
1705260368Sluigi		/* consistency check, but nothing really important here */
1706260368Sluigi		for (n = 0; likely(nm_i != head); n++) {
1707260368Sluigi			struct netmap_slot *slot = &ring->slot[nm_i];
1708270063Sluigi			void *addr = NMB(na, slot);
1709259412Sluigi
1710270063Sluigi			if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */
1711259412Sluigi				D("bad buffer index %d, ignore ?",
1712259412Sluigi					slot->buf_idx);
1713259412Sluigi			}
1714259412Sluigi			slot->flags &= ~NS_BUF_CHANGED;
1715260368Sluigi			nm_i = nm_next(nm_i, lim);
1716259412Sluigi		}
1717260368Sluigi		kring->nr_hwcur = head;
1718259412Sluigi	}
1719260368Sluigi
1720259412Sluigi	/* tell userspace that there are new packets */
1721260368Sluigi	nm_rxsync_finalize(kring);
1722259412Sluigi	n = 0;
1723259412Sluigidone:
1724260368Sluigi	return n;
1725260368Sluigi}
1726260368Sluigi
1727260368Sluigi/*
1728270063Sluigi * nm_rxsync callback for VALE ports
1729260368Sluigi * user process reading from a VALE switch.
1730260368Sluigi * Already protected against concurrent calls from userspace,
1731260368Sluigi * but we must acquire the queue's lock to protect against
1732260368Sluigi * writers on the same queue.
1733260368Sluigi */
1734260368Sluigistatic int
1735270063Sluiginetmap_vp_rxsync(struct netmap_kring *kring, int flags)
1736260368Sluigi{
1737260368Sluigi	int n;
1738260368Sluigi
1739260368Sluigi	mtx_lock(&kring->q_lock);
1740270063Sluigi	n = netmap_vp_rxsync_locked(kring, flags);
1741259412Sluigi	mtx_unlock(&kring->q_lock);
1742259412Sluigi	return n;
1743259412Sluigi}
1744259412Sluigi
1745260368Sluigi
1746270063Sluigi/* nm_bdg_attach callback for VALE ports
1747270063Sluigi * The na_vp port is this same netmap_adapter. There is no host port.
1748270063Sluigi */
1749259412Sluigistatic int
1750270063Sluiginetmap_vp_bdg_attach(const char *name, struct netmap_adapter *na)
1751259412Sluigi{
1752270063Sluigi	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
1753270063Sluigi
1754270063Sluigi	if (vpna->na_bdg)
1755270063Sluigi		return EBUSY;
1756270063Sluigi	na->na_vp = vpna;
1757270063Sluigi	strncpy(na->name, name, sizeof(na->name));
1758270063Sluigi	na->na_hostvp = NULL;
1759270063Sluigi	return 0;
1760270063Sluigi}
1761270063Sluigi
1762270063Sluigi/* create a netmap_vp_adapter that describes a VALE port.
1763270063Sluigi * Only persistent VALE ports have a non-null ifp.
1764270063Sluigi */
1765270063Sluigistatic int
1766270063Sluiginetmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, struct netmap_vp_adapter **ret)
1767270063Sluigi{
1768259412Sluigi	struct netmap_vp_adapter *vpna;
1769259412Sluigi	struct netmap_adapter *na;
1770259412Sluigi	int error;
1771261909Sluigi	u_int npipes = 0;
1772259412Sluigi
1773259412Sluigi	vpna = malloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO);
1774259412Sluigi	if (vpna == NULL)
1775259412Sluigi		return ENOMEM;
1776259412Sluigi
1777259412Sluigi 	na = &vpna->up;
1778259412Sluigi
1779259412Sluigi	na->ifp = ifp;
1780270063Sluigi	strncpy(na->name, nmr->nr_name, sizeof(na->name));
1781259412Sluigi
1782259412Sluigi	/* bound checking */
1783259412Sluigi	na->num_tx_rings = nmr->nr_tx_rings;
1784259412Sluigi	nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1785259412Sluigi	nmr->nr_tx_rings = na->num_tx_rings; // write back
1786259412Sluigi	na->num_rx_rings = nmr->nr_rx_rings;
1787259412Sluigi	nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1788259412Sluigi	nmr->nr_rx_rings = na->num_rx_rings; // write back
1789259412Sluigi	nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE,
1790259412Sluigi			1, NM_BDG_MAXSLOTS, NULL);
1791259412Sluigi	na->num_tx_desc = nmr->nr_tx_slots;
1792259412Sluigi	nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE,
1793259412Sluigi			1, NM_BDG_MAXSLOTS, NULL);
1794261909Sluigi	/* validate number of pipes. We want at least 1,
1795261909Sluigi	 * but probably can do with some more.
1796261909Sluigi	 * So let's use 2 as default (when 0 is supplied)
1797261909Sluigi	 */
1798261909Sluigi	npipes = nmr->nr_arg1;
1799261909Sluigi	nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL);
1800261909Sluigi	nmr->nr_arg1 = npipes;	/* write back */
1801261909Sluigi	/* validate extra bufs */
1802261909Sluigi	nm_bound_var(&nmr->nr_arg3, 0, 0,
1803261909Sluigi			128*NM_BDG_MAXSLOTS, NULL);
1804259412Sluigi	na->num_rx_desc = nmr->nr_rx_slots;
1805261909Sluigi	vpna->virt_hdr_len = 0;
1806261909Sluigi	vpna->mfs = 1514;
1807261909Sluigi	/*if (vpna->mfs > netmap_buf_size)  TODO netmap_buf_size is zero??
1808261909Sluigi		vpna->mfs = netmap_buf_size; */
1809261909Sluigi        if (netmap_verbose)
1810261909Sluigi		D("max frame size %u", vpna->mfs);
1811259412Sluigi
1812259412Sluigi	na->na_flags |= NAF_BDG_MAYSLEEP | NAF_MEM_OWNER;
1813270063Sluigi	na->nm_txsync = netmap_vp_txsync;
1814270063Sluigi	na->nm_rxsync = netmap_vp_rxsync;
1815270063Sluigi	na->nm_register = netmap_vp_reg;
1816259412Sluigi	na->nm_krings_create = netmap_vp_krings_create;
1817259412Sluigi	na->nm_krings_delete = netmap_vp_krings_delete;
1818270063Sluigi	na->nm_dtor = netmap_vp_dtor;
1819270063Sluigi	na->nm_mem = netmap_mem_private_new(na->name,
1820259412Sluigi			na->num_tx_rings, na->num_tx_desc,
1821261909Sluigi			na->num_rx_rings, na->num_rx_desc,
1822261909Sluigi			nmr->nr_arg3, npipes, &error);
1823261909Sluigi	if (na->nm_mem == NULL)
1824261909Sluigi		goto err;
1825270063Sluigi	na->nm_bdg_attach = netmap_vp_bdg_attach;
1826259412Sluigi	/* other nmd fields are set in the common routine */
1827259412Sluigi	error = netmap_attach_common(na);
1828261909Sluigi	if (error)
1829261909Sluigi		goto err;
1830270063Sluigi	*ret = vpna;
1831259412Sluigi	return 0;
1832261909Sluigi
1833261909Sluigierr:
1834261909Sluigi	if (na->nm_mem != NULL)
1835261909Sluigi		netmap_mem_private_delete(na->nm_mem);
1836261909Sluigi	free(vpna, M_DEVBUF);
1837261909Sluigi	return error;
1838259412Sluigi}
1839259412Sluigi
1840270063Sluigi/* Bridge wrapper code (bwrap).
1841270063Sluigi * This is used to connect a non-VALE-port netmap_adapter (hwna) to a
1842270063Sluigi * VALE switch.
1843270063Sluigi * The main task is to swap the meaning of tx and rx rings to match the
1844270063Sluigi * expectations of the VALE switch code (see nm_bdg_flush).
1845270063Sluigi *
1846270063Sluigi * The bwrap works by interposing a netmap_bwrap_adapter between the
1847270063Sluigi * rest of the system and the hwna. The netmap_bwrap_adapter looks like
1848270063Sluigi * a netmap_vp_adapter to the rest the system, but, internally, it
1849270063Sluigi * translates all callbacks to what the hwna expects.
1850270063Sluigi *
1851270063Sluigi * Note that we have to intercept callbacks coming from two sides:
1852270063Sluigi *
1853270063Sluigi *  - callbacks coming from the netmap module are intercepted by
1854270063Sluigi *    passing around the netmap_bwrap_adapter instead of the hwna
1855270063Sluigi *
1856270063Sluigi *  - callbacks coming from outside of the netmap module only know
1857270063Sluigi *    about the hwna. This, however, only happens in interrupt
1858270063Sluigi *    handlers, where only the hwna->nm_notify callback is called.
1859270063Sluigi *    What the bwrap does is to overwrite the hwna->nm_notify callback
1860270063Sluigi *    with its own netmap_bwrap_intr_notify.
1861270063Sluigi *    XXX This assumes that the hwna->nm_notify callback was the
1862270063Sluigi *    standard netmap_notify(), as it is the case for nic adapters.
1863270063Sluigi *    Any additional action performed by hwna->nm_notify will not be
1864270063Sluigi *    performed by netmap_bwrap_intr_notify.
1865270063Sluigi *
1866270063Sluigi * Additionally, the bwrap can optionally attach the host rings pair
1867270063Sluigi * of the wrapped adapter to a different port of the switch.
1868270063Sluigi */
1869260368Sluigi
1870270063Sluigi
1871259412Sluigistatic void
1872259412Sluiginetmap_bwrap_dtor(struct netmap_adapter *na)
1873259412Sluigi{
1874259412Sluigi	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
1875259412Sluigi	struct netmap_adapter *hwna = bna->hwna;
1876259412Sluigi
1877259412Sluigi	ND("na %p", na);
1878270063Sluigi	/* drop reference to hwna->ifp.
1879270063Sluigi	 * If we don't do this, netmap_detach_common(na)
1880270063Sluigi	 * will think it has set NA(na->ifp) to NULL
1881270063Sluigi	 */
1882270063Sluigi	na->ifp = NULL;
1883270063Sluigi	/* for safety, also drop the possible reference
1884270063Sluigi	 * in the hostna
1885270063Sluigi	 */
1886270063Sluigi	bna->host.up.ifp = NULL;
1887259412Sluigi
1888270063Sluigi	hwna->nm_mem = bna->save_nmd;
1889259412Sluigi	hwna->na_private = NULL;
1890270063Sluigi	hwna->na_vp = hwna->na_hostvp = NULL;
1891270063Sluigi	hwna->na_flags &= ~NAF_BUSY;
1892259412Sluigi	netmap_adapter_put(hwna);
1893259412Sluigi
1894259412Sluigi}
1895259412Sluigi
1896260368Sluigi
1897259412Sluigi/*
1898260368Sluigi * Intr callback for NICs connected to a bridge.
1899260368Sluigi * Simply ignore tx interrupts (maybe we could try to recover space ?)
1900260368Sluigi * and pass received packets from nic to the bridge.
1901260368Sluigi *
1902259412Sluigi * XXX TODO check locking: this is called from the interrupt
1903259412Sluigi * handler so we should make sure that the interface is not
1904259412Sluigi * disconnected while passing down an interrupt.
1905259412Sluigi *
1906260368Sluigi * Note, no user process can access this NIC or the host stack.
1907260368Sluigi * The only part of the ring that is significant are the slots,
1908260368Sluigi * and head/cur/tail are set from the kring as needed
1909260368Sluigi * (part as a receive ring, part as a transmit ring).
1910260368Sluigi *
1911260368Sluigi * callback that overwrites the hwna notify callback.
1912259412Sluigi * Packets come from the outside or from the host stack and are put on an hwna rx ring.
1913259412Sluigi * The bridge wrapper then sends the packets through the bridge.
1914259412Sluigi */
1915259412Sluigistatic int
1916259412Sluiginetmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, int flags)
1917259412Sluigi{
1918259412Sluigi	struct netmap_bwrap_adapter *bna = na->na_private;
1919259412Sluigi	struct netmap_vp_adapter *hostna = &bna->host;
1920259412Sluigi	struct netmap_kring *kring, *bkring;
1921259412Sluigi	struct netmap_ring *ring;
1922259412Sluigi	int is_host_ring = ring_nr == na->num_rx_rings;
1923259412Sluigi	struct netmap_vp_adapter *vpna = &bna->up;
1924259412Sluigi	int error = 0;
1925259412Sluigi
1926260368Sluigi	if (netmap_verbose)
1927270063Sluigi	    D("%s %s%d 0x%x", na->name,
1928260368Sluigi		(tx == NR_TX ? "TX" : "RX"), ring_nr, flags);
1929259412Sluigi
1930259412Sluigi	if (flags & NAF_DISABLE_NOTIFY) {
1931270063Sluigi		/* the enabled/disabled state of the ring has changed,
1932270063Sluigi		 * propagate the info to the wrapper (with tx/rx swapped)
1933270063Sluigi		 */
1934270063Sluigi		if (tx == NR_TX) {
1935270063Sluigi			netmap_set_rxring(&vpna->up, ring_nr,
1936270063Sluigi					na->tx_rings[ring_nr].nkr_stopped);
1937270063Sluigi		} else {
1938270063Sluigi			netmap_set_txring(&vpna->up, ring_nr,
1939270063Sluigi					na->rx_rings[ring_nr].nkr_stopped);
1940270063Sluigi		}
1941259412Sluigi		return 0;
1942259412Sluigi	}
1943259412Sluigi
1944270063Sluigi	if (!nm_netmap_on(na))
1945259412Sluigi		return 0;
1946259412Sluigi
1947260368Sluigi	/* we only care about receive interrupts */
1948259412Sluigi	if (tx == NR_TX)
1949259412Sluigi		return 0;
1950259412Sluigi
1951259412Sluigi	kring = &na->rx_rings[ring_nr];
1952259412Sluigi	ring = kring->ring;
1953259412Sluigi
1954259412Sluigi	/* make sure the ring is not disabled */
1955259412Sluigi	if (nm_kr_tryget(kring))
1956259412Sluigi		return 0;
1957259412Sluigi
1958259412Sluigi	if (is_host_ring && hostna->na_bdg == NULL) {
1959259412Sluigi		error = bna->save_notify(na, ring_nr, tx, flags);
1960259412Sluigi		goto put_out;
1961259412Sluigi	}
1962259412Sluigi
1963260368Sluigi	/* Here we expect ring->head = ring->cur = ring->tail
1964260368Sluigi	 * because everything has been released from the previous round.
1965260368Sluigi	 * However the ring is shared and we might have info from
1966260368Sluigi	 * the wrong side (the tx ring). Hence we overwrite with
1967260368Sluigi	 * the info from the rx kring.
1968260368Sluigi	 */
1969260368Sluigi	if (netmap_verbose)
1970270063Sluigi	    D("%s head %d cur %d tail %d (kring %d %d %d)",  na->name,
1971260368Sluigi		ring->head, ring->cur, ring->tail,
1972260368Sluigi		kring->rhead, kring->rcur, kring->rtail);
1973260368Sluigi
1974260368Sluigi	ring->head = kring->rhead;
1975260368Sluigi	ring->cur = kring->rcur;
1976260368Sluigi	ring->tail = kring->rtail;
1977260368Sluigi
1978259412Sluigi	if (is_host_ring) {
1979259412Sluigi		vpna = hostna;
1980259412Sluigi		ring_nr = 0;
1981267128Sluigi	}
1982261909Sluigi	/* simulate a user wakeup on the rx ring */
1983261909Sluigi	/* fetch packets that have arrived.
1984261909Sluigi	 * XXX maybe do this in a loop ?
1985261909Sluigi	 */
1986261909Sluigi	error = kring->nm_sync(kring, 0);
1987261909Sluigi	if (error)
1988261909Sluigi		goto put_out;
1989260368Sluigi	if (kring->nr_hwcur == kring->nr_hwtail && netmap_verbose) {
1990259412Sluigi		D("how strange, interrupt with no packets on %s",
1991270063Sluigi			na->name);
1992259412Sluigi		goto put_out;
1993259412Sluigi	}
1994260368Sluigi
1995260368Sluigi	/* new packets are ring->cur to ring->tail, and the bkring
1996260368Sluigi	 * had hwcur == ring->cur. So advance ring->cur to ring->tail
1997260368Sluigi	 * to push all packets out.
1998260368Sluigi	 */
1999260368Sluigi	ring->head = ring->cur = ring->tail;
2000260368Sluigi
2001260368Sluigi	/* also set tail to what the bwrap expects */
2002260368Sluigi	bkring = &vpna->up.tx_rings[ring_nr];
2003260368Sluigi	ring->tail = bkring->nr_hwtail; // rtail too ?
2004260368Sluigi
2005260368Sluigi	/* pass packets to the switch */
2006260368Sluigi	nm_txsync_prologue(bkring); // XXX error checking ?
2007270063Sluigi	netmap_vp_txsync(bkring, flags);
2008259412Sluigi
2009260368Sluigi	/* mark all buffers as released on this ring */
2010260368Sluigi	ring->head = ring->cur = kring->nr_hwtail;
2011260368Sluigi	ring->tail = kring->rtail;
2012260368Sluigi	/* another call to actually release the buffers */
2013260368Sluigi	if (!is_host_ring) {
2014261909Sluigi		error = kring->nm_sync(kring, 0);
2015260368Sluigi	} else {
2016260368Sluigi		/* mark all packets as released, as in the
2017260368Sluigi		 * second part of netmap_rxsync_from_host()
2018260368Sluigi		 */
2019260368Sluigi		kring->nr_hwcur = kring->nr_hwtail;
2020260368Sluigi		nm_rxsync_finalize(kring);
2021260368Sluigi	}
2022259412Sluigi
2023259412Sluigiput_out:
2024259412Sluigi	nm_kr_put(kring);
2025259412Sluigi	return error;
2026259412Sluigi}
2027259412Sluigi
2028260368Sluigi
2029270063Sluigi/* nm_register callback for bwrap */
2030259412Sluigistatic int
2031259412Sluiginetmap_bwrap_register(struct netmap_adapter *na, int onoff)
2032259412Sluigi{
2033259412Sluigi	struct netmap_bwrap_adapter *bna =
2034259412Sluigi		(struct netmap_bwrap_adapter *)na;
2035259412Sluigi	struct netmap_adapter *hwna = bna->hwna;
2036259412Sluigi	struct netmap_vp_adapter *hostna = &bna->host;
2037259412Sluigi	int error;
2038259412Sluigi
2039270063Sluigi	ND("%s %s", na->name, onoff ? "on" : "off");
2040259412Sluigi
2041259412Sluigi	if (onoff) {
2042259412Sluigi		int i;
2043259412Sluigi
2044270063Sluigi		/* netmap_do_regif has been called on the bwrap na.
2045270063Sluigi		 * We need to pass the information about the
2046270063Sluigi		 * memory allocator down to the hwna before
2047270063Sluigi		 * putting it in netmap mode
2048270063Sluigi		 */
2049259412Sluigi		hwna->na_lut = na->na_lut;
2050259412Sluigi		hwna->na_lut_objtotal = na->na_lut_objtotal;
2051270063Sluigi		hwna->na_lut_objsize = na->na_lut_objsize;
2052259412Sluigi
2053259412Sluigi		if (hostna->na_bdg) {
2054270063Sluigi			/* if the host rings have been attached to switch,
2055270063Sluigi			 * we need to copy the memory allocator information
2056270063Sluigi			 * in the hostna also
2057270063Sluigi			 */
2058259412Sluigi			hostna->up.na_lut = na->na_lut;
2059259412Sluigi			hostna->up.na_lut_objtotal = na->na_lut_objtotal;
2060270063Sluigi			hostna->up.na_lut_objsize = na->na_lut_objsize;
2061259412Sluigi		}
2062259412Sluigi
2063260516Sluigi		/* cross-link the netmap rings
2064260516Sluigi		 * The original number of rings comes from hwna,
2065260516Sluigi		 * rx rings on one side equals tx rings on the other.
2066270063Sluigi		 * We need to do this now, after the initialization
2067270063Sluigi		 * of the kring->ring pointers
2068260516Sluigi		 */
2069261909Sluigi		for (i = 0; i < na->num_rx_rings + 1; i++) {
2070259412Sluigi			hwna->tx_rings[i].nkr_num_slots = na->rx_rings[i].nkr_num_slots;
2071259412Sluigi			hwna->tx_rings[i].ring = na->rx_rings[i].ring;
2072259412Sluigi		}
2073261909Sluigi		for (i = 0; i < na->num_tx_rings + 1; i++) {
2074259412Sluigi			hwna->rx_rings[i].nkr_num_slots = na->tx_rings[i].nkr_num_slots;
2075259412Sluigi			hwna->rx_rings[i].ring = na->tx_rings[i].ring;
2076259412Sluigi		}
2077259412Sluigi	}
2078259412Sluigi
2079270063Sluigi	/* forward the request to the hwna */
2080270063Sluigi	error = hwna->nm_register(hwna, onoff);
2081270063Sluigi	if (error)
2082270063Sluigi		return error;
2083259412Sluigi
2084270063Sluigi	/* impersonate a netmap_vp_adapter */
2085270063Sluigi	netmap_vp_reg(na, onoff);
2086270063Sluigi	if (hostna->na_bdg)
2087270063Sluigi		netmap_vp_reg(&hostna->up, onoff);
2088259412Sluigi
2089259412Sluigi	if (onoff) {
2090270063Sluigi		/* intercept the hwna nm_nofify callback */
2091259412Sluigi		bna->save_notify = hwna->nm_notify;
2092259412Sluigi		hwna->nm_notify = netmap_bwrap_intr_notify;
2093259412Sluigi	} else {
2094259412Sluigi		hwna->nm_notify = bna->save_notify;
2095259412Sluigi		hwna->na_lut = NULL;
2096259412Sluigi		hwna->na_lut_objtotal = 0;
2097270063Sluigi		hwna->na_lut_objsize = 0;
2098259412Sluigi	}
2099259412Sluigi
2100259412Sluigi	return 0;
2101259412Sluigi}
2102259412Sluigi
2103270063Sluigi/* nm_config callback for bwrap */
2104259412Sluigistatic int
2105259412Sluiginetmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
2106259412Sluigi				    u_int *rxr, u_int *rxd)
2107259412Sluigi{
2108259412Sluigi	struct netmap_bwrap_adapter *bna =
2109259412Sluigi		(struct netmap_bwrap_adapter *)na;
2110259412Sluigi	struct netmap_adapter *hwna = bna->hwna;
2111259412Sluigi
2112259412Sluigi	/* forward the request */
2113259412Sluigi	netmap_update_config(hwna);
2114259412Sluigi	/* swap the results */
2115259412Sluigi	*txr = hwna->num_rx_rings;
2116259412Sluigi	*txd = hwna->num_rx_desc;
2117259412Sluigi	*rxr = hwna->num_tx_rings;
2118259412Sluigi	*rxd = hwna->num_rx_desc;
2119259412Sluigi
2120259412Sluigi	return 0;
2121259412Sluigi}
2122259412Sluigi
2123260368Sluigi
2124270063Sluigi/* nm_krings_create callback for bwrap */
2125259412Sluigistatic int
2126259412Sluiginetmap_bwrap_krings_create(struct netmap_adapter *na)
2127259412Sluigi{
2128259412Sluigi	struct netmap_bwrap_adapter *bna =
2129259412Sluigi		(struct netmap_bwrap_adapter *)na;
2130259412Sluigi	struct netmap_adapter *hwna = bna->hwna;
2131259412Sluigi	struct netmap_adapter *hostna = &bna->host.up;
2132259412Sluigi	int error;
2133259412Sluigi
2134270063Sluigi	ND("%s", na->name);
2135259412Sluigi
2136270063Sluigi	/* impersonate a netmap_vp_adapter */
2137259412Sluigi	error = netmap_vp_krings_create(na);
2138259412Sluigi	if (error)
2139259412Sluigi		return error;
2140259412Sluigi
2141270063Sluigi	/* also create the hwna krings */
2142259412Sluigi	error = hwna->nm_krings_create(hwna);
2143259412Sluigi	if (error) {
2144259412Sluigi		netmap_vp_krings_delete(na);
2145259412Sluigi		return error;
2146259412Sluigi	}
2147270063Sluigi	/* the connection between the bwrap krings and the hwna krings
2148270063Sluigi	 * will be perfomed later, in the nm_register callback, since
2149270063Sluigi	 * now the kring->ring pointers have not been initialized yet
2150270063Sluigi	 */
2151259412Sluigi
2152261909Sluigi	if (na->na_flags & NAF_HOST_RINGS) {
2153270063Sluigi		/* the hostna rings are the host rings of the bwrap.
2154270063Sluigi		 * The corresponding krings must point back to the
2155270063Sluigi		 * hostna
2156270063Sluigi		 */
2157261909Sluigi		hostna->tx_rings = na->tx_rings + na->num_tx_rings;
2158270063Sluigi		hostna->tx_rings[0].na = hostna;
2159261909Sluigi		hostna->rx_rings = na->rx_rings + na->num_rx_rings;
2160270063Sluigi		hostna->rx_rings[0].na = hostna;
2161261909Sluigi	}
2162259412Sluigi
2163259412Sluigi	return 0;
2164259412Sluigi}
2165259412Sluigi
2166260368Sluigi
2167259412Sluigistatic void
2168259412Sluiginetmap_bwrap_krings_delete(struct netmap_adapter *na)
2169259412Sluigi{
2170259412Sluigi	struct netmap_bwrap_adapter *bna =
2171259412Sluigi		(struct netmap_bwrap_adapter *)na;
2172259412Sluigi	struct netmap_adapter *hwna = bna->hwna;
2173259412Sluigi
2174270063Sluigi	ND("%s", na->name);
2175259412Sluigi
2176259412Sluigi	hwna->nm_krings_delete(hwna);
2177259412Sluigi	netmap_vp_krings_delete(na);
2178259412Sluigi}
2179259412Sluigi
2180260368Sluigi
2181259412Sluigi/* notify method for the bridge-->hwna direction */
2182259412Sluigistatic int
2183259412Sluiginetmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
2184259412Sluigi{
2185259412Sluigi	struct netmap_bwrap_adapter *bna =
2186259412Sluigi		(struct netmap_bwrap_adapter *)na;
2187259412Sluigi	struct netmap_adapter *hwna = bna->hwna;
2188259412Sluigi	struct netmap_kring *kring, *hw_kring;
2189259412Sluigi	struct netmap_ring *ring;
2190260368Sluigi	u_int lim;
2191259412Sluigi	int error = 0;
2192259412Sluigi
2193259412Sluigi	if (tx == NR_TX)
2194260700Sluigi	        return EINVAL;
2195259412Sluigi
2196259412Sluigi	kring = &na->rx_rings[ring_n];
2197259412Sluigi	hw_kring = &hwna->tx_rings[ring_n];
2198259412Sluigi	ring = kring->ring;
2199259412Sluigi	lim = kring->nkr_num_slots - 1;
2200259412Sluigi
2201270063Sluigi	if (!nm_netmap_on(hwna))
2202259412Sluigi		return 0;
2203261909Sluigi	mtx_lock(&kring->q_lock);
2204260368Sluigi	/* first step: simulate a user wakeup on the rx ring */
2205270063Sluigi	netmap_vp_rxsync_locked(kring, flags);
2206260368Sluigi	ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
2207270063Sluigi		na->name, ring_n,
2208260368Sluigi		kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
2209260368Sluigi		ring->head, ring->cur, ring->tail,
2210260368Sluigi		hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail);
2211260368Sluigi	/* second step: the simulated user consumes all new packets */
2212260368Sluigi	ring->head = ring->cur = ring->tail;
2213260368Sluigi
2214260368Sluigi	/* third step: the new packets are sent on the tx ring
2215260368Sluigi	 * (which is actually the same ring)
2216260368Sluigi	 */
2217260368Sluigi	/* set tail to what the hw expects */
2218260368Sluigi	ring->tail = hw_kring->rtail;
2219261909Sluigi	nm_txsync_prologue(&hwna->tx_rings[ring_n]); // XXX error checking ?
2220261909Sluigi	error = hw_kring->nm_sync(hw_kring, flags);
2221260368Sluigi
2222260368Sluigi	/* fourth step: now we are back the rx ring */
2223260368Sluigi	/* claim ownership on all hw owned bufs */
2224260368Sluigi	ring->head = nm_next(ring->tail, lim); /* skip past reserved slot */
2225260368Sluigi	ring->tail = kring->rtail; /* restore saved value of tail, for safety */
2226260368Sluigi
2227260368Sluigi	/* fifth step: the user goes to sleep again, causing another rxsync */
2228270063Sluigi	netmap_vp_rxsync_locked(kring, flags);
2229260368Sluigi	ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
2230270063Sluigi		na->name, ring_n,
2231260368Sluigi		kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
2232260368Sluigi		ring->head, ring->cur, ring->tail,
2233260368Sluigi		hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
2234261909Sluigi	mtx_unlock(&kring->q_lock);
2235259412Sluigi	return error;
2236259412Sluigi}
2237259412Sluigi
2238260368Sluigi
2239270063Sluigi/* notify method for the bridge-->host-rings path */
2240259412Sluigistatic int
2241259412Sluiginetmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
2242259412Sluigi{
2243259412Sluigi	struct netmap_bwrap_adapter *bna = na->na_private;
2244259412Sluigi	struct netmap_adapter *port_na = &bna->up.up;
2245259412Sluigi	if (tx == NR_TX || ring_n != 0)
2246260700Sluigi		return EINVAL;
2247259412Sluigi	return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags);
2248259412Sluigi}
2249259412Sluigi
2250260368Sluigi
2251270063Sluigi/* nm_bdg_ctl callback for the bwrap.
2252270063Sluigi * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd].
2253270063Sluigi * On attach, it needs to provide a fake netmap_priv_d structure and
2254270063Sluigi * perform a netmap_do_regif() on the bwrap. This will put both the
2255270063Sluigi * bwrap and the hwna in netmap mode, with the netmap rings shared
2256270063Sluigi * and cross linked. Moroever, it will start intercepting interrupts
2257270063Sluigi * directed to hwna.
2258270063Sluigi */
2259259412Sluigistatic int
2260270063Sluiginetmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
2261259412Sluigi{
2262270063Sluigi	struct netmap_priv_d *npriv;
2263270063Sluigi	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
2264270063Sluigi	struct netmap_if *nifp;
2265270063Sluigi	int error = 0;
2266270063Sluigi
2267270063Sluigi	if (attach) {
2268270063Sluigi		if (NETMAP_OWNED_BY_ANY(na)) {
2269270063Sluigi			return EBUSY;
2270270063Sluigi		}
2271270063Sluigi		if (bna->na_kpriv) {
2272270063Sluigi			/* nothing to do */
2273270063Sluigi			return 0;
2274270063Sluigi		}
2275270063Sluigi		npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO);
2276270063Sluigi		if (npriv == NULL)
2277270063Sluigi			return ENOMEM;
2278270063Sluigi		nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags, &error);
2279270063Sluigi		if (!nifp) {
2280270063Sluigi			bzero(npriv, sizeof(*npriv));
2281270063Sluigi			free(npriv, M_DEVBUF);
2282270063Sluigi			return error;
2283270063Sluigi		}
2284270063Sluigi		bna->na_kpriv = npriv;
2285270063Sluigi		na->na_flags |= NAF_BUSY;
2286270063Sluigi	} else {
2287270063Sluigi		int last_instance;
2288270063Sluigi
2289270063Sluigi		if (na->active_fds == 0) /* not registered */
2290270063Sluigi			return EINVAL;
2291270063Sluigi		last_instance = netmap_dtor_locked(bna->na_kpriv);
2292270063Sluigi		if (!last_instance) {
2293270063Sluigi			D("--- error, trying to detach an entry with active mmaps");
2294270063Sluigi			error = EINVAL;
2295270063Sluigi		} else {
2296270063Sluigi			struct nm_bridge *b = bna->up.na_bdg,
2297270063Sluigi				*bh = bna->host.na_bdg;
2298270063Sluigi			npriv = bna->na_kpriv;
2299270063Sluigi			bna->na_kpriv = NULL;
2300270063Sluigi			D("deleting priv");
2301270063Sluigi
2302270063Sluigi			bzero(npriv, sizeof(*npriv));
2303270063Sluigi			free(npriv, M_DEVBUF);
2304270063Sluigi			if (b) {
2305270063Sluigi				/* XXX the bwrap dtor should take care
2306270063Sluigi				 * of this (2014-06-16)
2307270063Sluigi				 */
2308270063Sluigi				netmap_bdg_detach_common(b, bna->up.bdg_port,
2309270063Sluigi				    (bh ? bna->host.bdg_port : -1));
2310270063Sluigi			}
2311270063Sluigi			na->na_flags &= ~NAF_BUSY;
2312270063Sluigi		}
2313270063Sluigi	}
2314270063Sluigi	return error;
2315270063Sluigi
2316270063Sluigi}
2317270063Sluigi
2318270063Sluigi/* attach a bridge wrapper to the 'real' device */
2319270063Sluigiint
2320270063Sluiginetmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
2321270063Sluigi{
2322259412Sluigi	struct netmap_bwrap_adapter *bna;
2323270063Sluigi	struct netmap_adapter *na = NULL;
2324270063Sluigi	struct netmap_adapter *hostna = NULL;
2325270063Sluigi	int error = 0;
2326259412Sluigi
2327270063Sluigi	/* make sure the NIC is not already in use */
2328270063Sluigi	if (NETMAP_OWNED_BY_ANY(hwna)) {
2329270063Sluigi		D("NIC %s busy, cannot attach to bridge", hwna->name);
2330270063Sluigi		return EBUSY;
2331270063Sluigi	}
2332259412Sluigi
2333259412Sluigi	bna = malloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO);
2334270063Sluigi	if (bna == NULL) {
2335259412Sluigi		return ENOMEM;
2336270063Sluigi	}
2337259412Sluigi
2338259412Sluigi	na = &bna->up.up;
2339270063Sluigi	strncpy(na->name, nr_name, sizeof(na->name));
2340259412Sluigi	/* fill the ring data for the bwrap adapter with rx/tx meanings
2341259412Sluigi	 * swapped. The real cross-linking will be done during register,
2342259412Sluigi	 * when all the krings will have been created.
2343259412Sluigi	 */
2344259412Sluigi	na->num_rx_rings = hwna->num_tx_rings;
2345259412Sluigi	na->num_tx_rings = hwna->num_rx_rings;
2346259412Sluigi	na->num_tx_desc = hwna->num_rx_desc;
2347259412Sluigi	na->num_rx_desc = hwna->num_tx_desc;
2348259412Sluigi	na->nm_dtor = netmap_bwrap_dtor;
2349259412Sluigi	na->nm_register = netmap_bwrap_register;
2350259412Sluigi	// na->nm_txsync = netmap_bwrap_txsync;
2351259412Sluigi	// na->nm_rxsync = netmap_bwrap_rxsync;
2352259412Sluigi	na->nm_config = netmap_bwrap_config;
2353259412Sluigi	na->nm_krings_create = netmap_bwrap_krings_create;
2354259412Sluigi	na->nm_krings_delete = netmap_bwrap_krings_delete;
2355259412Sluigi	na->nm_notify = netmap_bwrap_notify;
2356270063Sluigi	na->nm_bdg_ctl = netmap_bwrap_bdg_ctl;
2357270063Sluigi	na->pdev = hwna->pdev;
2358270063Sluigi	na->nm_mem = netmap_mem_private_new(na->name,
2359270063Sluigi			na->num_tx_rings, na->num_tx_desc,
2360270063Sluigi			na->num_rx_rings, na->num_rx_desc,
2361270063Sluigi			0, 0, &error);
2362270063Sluigi	na->na_flags |= NAF_MEM_OWNER;
2363270063Sluigi	if (na->nm_mem == NULL)
2364270063Sluigi		goto err_put;
2365259412Sluigi	bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
2366259412Sluigi
2367259412Sluigi	bna->hwna = hwna;
2368259412Sluigi	netmap_adapter_get(hwna);
2369259412Sluigi	hwna->na_private = bna; /* weak reference */
2370270063Sluigi	hwna->na_vp = &bna->up;
2371270063Sluigi
2372261909Sluigi	if (hwna->na_flags & NAF_HOST_RINGS) {
2373270063Sluigi		if (hwna->na_flags & NAF_SW_ONLY)
2374270063Sluigi			na->na_flags |= NAF_SW_ONLY;
2375261909Sluigi		na->na_flags |= NAF_HOST_RINGS;
2376261909Sluigi		hostna = &bna->host.up;
2377270063Sluigi		snprintf(hostna->name, sizeof(hostna->name), "%s^", nr_name);
2378261909Sluigi		hostna->ifp = hwna->ifp;
2379261909Sluigi		hostna->num_tx_rings = 1;
2380261909Sluigi		hostna->num_tx_desc = hwna->num_rx_desc;
2381261909Sluigi		hostna->num_rx_rings = 1;
2382261909Sluigi		hostna->num_rx_desc = hwna->num_tx_desc;
2383261909Sluigi		// hostna->nm_txsync = netmap_bwrap_host_txsync;
2384261909Sluigi		// hostna->nm_rxsync = netmap_bwrap_host_rxsync;
2385261909Sluigi		hostna->nm_notify = netmap_bwrap_host_notify;
2386261909Sluigi		hostna->nm_mem = na->nm_mem;
2387261909Sluigi		hostna->na_private = bna;
2388270063Sluigi		hostna->na_vp = &bna->up;
2389270063Sluigi		na->na_hostvp = hwna->na_hostvp =
2390270063Sluigi			hostna->na_hostvp = &bna->host;
2391270063Sluigi		hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */
2392261909Sluigi	}
2393259412Sluigi
2394260368Sluigi	ND("%s<->%s txr %d txd %d rxr %d rxd %d",
2395270063Sluigi		na->name, ifp->if_xname,
2396259412Sluigi		na->num_tx_rings, na->num_tx_desc,
2397259412Sluigi		na->num_rx_rings, na->num_rx_desc);
2398259412Sluigi
2399259412Sluigi	error = netmap_attach_common(na);
2400259412Sluigi	if (error) {
2401270063Sluigi		goto err_free;
2402259412Sluigi	}
2403270063Sluigi	/* make bwrap ifp point to the real ifp
2404270063Sluigi	 * NOTE: netmap_attach_common() interprets a non-NULL na->ifp
2405270063Sluigi	 * as a request to make the ifp point to the na. Since we
2406270063Sluigi	 * do not want to change the na already pointed to by hwna->ifp,
2407270063Sluigi	 * the following assignment has to be delayed until now
2408270063Sluigi	 */
2409270063Sluigi	na->ifp = hwna->ifp;
2410270063Sluigi	hwna->na_flags |= NAF_BUSY;
2411270063Sluigi	/* make hwna point to the allocator we are actually using,
2412270063Sluigi	 * so that monitors will be able to find it
2413270063Sluigi	 */
2414270063Sluigi	bna->save_nmd = hwna->nm_mem;
2415270063Sluigi	hwna->nm_mem = na->nm_mem;
2416259412Sluigi	return 0;
2417270063Sluigi
2418270063Sluigierr_free:
2419270063Sluigi	netmap_mem_private_delete(na->nm_mem);
2420270063Sluigierr_put:
2421270063Sluigi	hwna->na_vp = hwna->na_hostvp = NULL;
2422270063Sluigi	netmap_adapter_put(hwna);
2423270063Sluigi	free(bna, M_DEVBUF);
2424270063Sluigi	return error;
2425270063Sluigi
2426259412Sluigi}
2427259412Sluigi
2428260368Sluigi
2429259412Sluigivoid
2430259412Sluiginetmap_init_bridges(void)
2431259412Sluigi{
2432259412Sluigi	int i;
2433259412Sluigi	bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */
2434259412Sluigi	for (i = 0; i < NM_BRIDGES; i++)
2435259412Sluigi		BDG_RWINIT(&nm_bridges[i]);
2436259412Sluigi}
2437259412Sluigi#endif /* WITH_VALE */
2438