1262152Sluigi/*
2262152Sluigi * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
3262152Sluigi *
4262152Sluigi * Redistribution and use in source and binary forms, with or without
5262152Sluigi * modification, are permitted provided that the following conditions
6262152Sluigi * are met:
7262152Sluigi *   1. Redistributions of source code must retain the above copyright
8262152Sluigi *      notice, this list of conditions and the following disclaimer.
9262152Sluigi *   2. Redistributions in binary form must reproduce the above copyright
10262152Sluigi *      notice, this list of conditions and the following disclaimer in the
11262152Sluigi *      documentation and/or other materials provided with the distribution.
12262152Sluigi *
13262152Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14262152Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15262152Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16262152Sluigi * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17262152Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18262152Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19262152Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20262152Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21262152Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22262152Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23262152Sluigi * SUCH DAMAGE.
24262152Sluigi */
25262152Sluigi
26262152Sluigi
27262152Sluigi/*
28262152Sluigi * This module implements the VALE switch for netmap
29262152Sluigi
30262152Sluigi--- VALE SWITCH ---
31262152Sluigi
32262152SluigiNMG_LOCK() serializes all modifications to switches and ports.
33262152SluigiA switch cannot be deleted until all ports are gone.
34262152Sluigi
35262152SluigiFor each switch, an SX lock (RWlock on linux) protects
36262152Sluigideletion of ports. When configuring or deleting a new port, the
37262152Sluigilock is acquired in exclusive mode (after holding NMG_LOCK).
38262152SluigiWhen forwarding, the lock is acquired in shared mode (without NMG_LOCK).
39262152SluigiThe lock is held throughout the entire forwarding cycle,
40262152Sluigiduring which the thread may incur in a page fault.
41262152SluigiHence it is important that sleepable shared locks are used.
42262152Sluigi
43262152SluigiOn the rx ring, the per-port lock is grabbed initially to reserve
44262152Sluigia number of slot in the ring, then the lock is released,
45262152Sluigipackets are copied from source to destination, and then
46262152Sluigithe lock is acquired again and the receive ring is updated.
47262152Sluigi(A similar thing is done on the tx ring for NIC and host stack
48262152Sluigiports attached to the switch)
49262152Sluigi
50262152Sluigi */
51262152Sluigi
52262152Sluigi/*
53262152Sluigi * OS-specific code that is used only within this file.
54262152Sluigi * Other OS-specific code that must be accessed by drivers
55262152Sluigi * is present in netmap_kern.h
56262152Sluigi */
57262152Sluigi
58262152Sluigi#if defined(__FreeBSD__)
59262152Sluigi#include <sys/cdefs.h> /* prerequisite */
60262152Sluigi__FBSDID("$FreeBSD$");
61262152Sluigi
62262152Sluigi#include <sys/types.h>
63262152Sluigi#include <sys/errno.h>
64262152Sluigi#include <sys/param.h>	/* defines used in kernel.h */
65262152Sluigi#include <sys/kernel.h>	/* types used in module initialization */
66262152Sluigi#include <sys/conf.h>	/* cdevsw struct, UID, GID */
67262152Sluigi#include <sys/sockio.h>
68262152Sluigi#include <sys/socketvar.h>	/* struct socket */
69262152Sluigi#include <sys/malloc.h>
70262152Sluigi#include <sys/poll.h>
71262152Sluigi#include <sys/rwlock.h>
72262152Sluigi#include <sys/socket.h> /* sockaddrs */
73262152Sluigi#include <sys/selinfo.h>
74262152Sluigi#include <sys/sysctl.h>
75262152Sluigi#include <net/if.h>
76262152Sluigi#include <net/if_var.h>
77262152Sluigi#include <net/bpf.h>		/* BIOCIMMEDIATE */
78262152Sluigi#include <machine/bus.h>	/* bus_dmamap_* */
79262152Sluigi#include <sys/endian.h>
80262152Sluigi#include <sys/refcount.h>
81262152Sluigi
82262152Sluigi
83262152Sluigi#define BDG_RWLOCK_T		struct rwlock // struct rwlock
84262152Sluigi
85262152Sluigi#define	BDG_RWINIT(b)		\
86262152Sluigi	rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS)
87262152Sluigi#define BDG_WLOCK(b)		rw_wlock(&(b)->bdg_lock)
88262152Sluigi#define BDG_WUNLOCK(b)		rw_wunlock(&(b)->bdg_lock)
89262152Sluigi#define BDG_RLOCK(b)		rw_rlock(&(b)->bdg_lock)
90262152Sluigi#define BDG_RTRYLOCK(b)		rw_try_rlock(&(b)->bdg_lock)
91262152Sluigi#define BDG_RUNLOCK(b)		rw_runlock(&(b)->bdg_lock)
92262152Sluigi#define BDG_RWDESTROY(b)	rw_destroy(&(b)->bdg_lock)
93262152Sluigi
94262152Sluigi
95262152Sluigi#elif defined(linux)
96262152Sluigi
97262152Sluigi#include "bsd_glue.h"
98262152Sluigi
99262152Sluigi#elif defined(__APPLE__)
100262152Sluigi
101262152Sluigi#warning OSX support is only partial
102262152Sluigi#include "osx_glue.h"
103262152Sluigi
104262152Sluigi#else
105262152Sluigi
106262152Sluigi#error	Unsupported platform
107262152Sluigi
108262152Sluigi#endif /* unsupported */
109262152Sluigi
110262152Sluigi/*
111262152Sluigi * common headers
112262152Sluigi */
113262152Sluigi
114262152Sluigi#include <net/netmap.h>
115262152Sluigi#include <dev/netmap/netmap_kern.h>
116262152Sluigi#include <dev/netmap/netmap_mem2.h>
117262152Sluigi
118262152Sluigi#ifdef WITH_VALE
119262152Sluigi
120262152Sluigi/*
121262152Sluigi * system parameters (most of them in netmap_kern.h)
122262152Sluigi * NM_NAME	prefix for switch port names, default "vale"
123262152Sluigi * NM_BDG_MAXPORTS	number of ports
124262152Sluigi * NM_BRIDGES	max number of switches in the system.
125262152Sluigi *	XXX should become a sysctl or tunable
126262152Sluigi *
127262152Sluigi * Switch ports are named valeX:Y where X is the switch name and Y
128262152Sluigi * is the port. If Y matches a physical interface name, the port is
129262152Sluigi * connected to a physical device.
130262152Sluigi *
131262152Sluigi * Unlike physical interfaces, switch ports use their own memory region
132262152Sluigi * for rings and buffers.
133262152Sluigi * The virtual interfaces use per-queue lock instead of core lock.
134262152Sluigi * In the tx loop, we aggregate traffic in batches to make all operations
135262152Sluigi * faster. The batch size is bridge_batch.
136262152Sluigi */
137262152Sluigi#define NM_BDG_MAXRINGS		16	/* XXX unclear how many. */
138262152Sluigi#define NM_BDG_MAXSLOTS		4096	/* XXX same as above */
139262152Sluigi#define NM_BRIDGE_RINGSIZE	1024	/* in the device */
140262152Sluigi#define NM_BDG_HASH		1024	/* forwarding table entries */
141262152Sluigi#define NM_BDG_BATCH		1024	/* entries in the forwarding buffer */
142262152Sluigi#define NM_MULTISEG		64	/* max size of a chain of bufs */
143262152Sluigi/* actual size of the tables */
144262152Sluigi#define NM_BDG_BATCH_MAX	(NM_BDG_BATCH + NM_MULTISEG)
145262152Sluigi/* NM_FT_NULL terminates a list of slots in the ft */
146262152Sluigi#define NM_FT_NULL		NM_BDG_BATCH_MAX
147262152Sluigi#define	NM_BRIDGES		8	/* number of bridges */
148262152Sluigi
149262152Sluigi
150262152Sluigi/*
151262152Sluigi * bridge_batch is set via sysctl to the max batch size to be
152262152Sluigi * used in the bridge. The actual value may be larger as the
153262152Sluigi * last packet in the block may overflow the size.
154262152Sluigi */
155262152Sluigiint bridge_batch = NM_BDG_BATCH; /* bridge batch size */
156262152SluigiSYSCTL_DECL(_dev_netmap);
157262152SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
158262152Sluigi
159262152Sluigi
160270252Sluigistatic int netmap_vp_create(struct nmreq *, struct ifnet *, struct netmap_vp_adapter **);
161270252Sluigistatic int netmap_vp_reg(struct netmap_adapter *na, int onoff);
162262152Sluigistatic int netmap_bwrap_register(struct netmap_adapter *, int onoff);
163262152Sluigi
164262152Sluigi/*
165262152Sluigi * For each output interface, nm_bdg_q is used to construct a list.
166262152Sluigi * bq_len is the number of output buffers (we can have coalescing
167262152Sluigi * during the copy).
168262152Sluigi */
169262152Sluigistruct nm_bdg_q {
170262152Sluigi	uint16_t bq_head;
171262152Sluigi	uint16_t bq_tail;
172262152Sluigi	uint32_t bq_len;	/* number of buffers */
173262152Sluigi};
174262152Sluigi
175262152Sluigi/* XXX revise this */
176262152Sluigistruct nm_hash_ent {
177262152Sluigi	uint64_t	mac;	/* the top 2 bytes are the epoch */
178262152Sluigi	uint64_t	ports;
179262152Sluigi};
180262152Sluigi
181262152Sluigi/*
182262152Sluigi * nm_bridge is a descriptor for a VALE switch.
183262152Sluigi * Interfaces for a bridge are all in bdg_ports[].
184262152Sluigi * The array has fixed size, an empty entry does not terminate
185262152Sluigi * the search, but lookups only occur on attach/detach so we
186262152Sluigi * don't mind if they are slow.
187262152Sluigi *
188262152Sluigi * The bridge is non blocking on the transmit ports: excess
189262152Sluigi * packets are dropped if there is no room on the output port.
190262152Sluigi *
191262152Sluigi * bdg_lock protects accesses to the bdg_ports array.
192262152Sluigi * This is a rw lock (or equivalent).
193262152Sluigi */
194262152Sluigistruct nm_bridge {
195262152Sluigi	/* XXX what is the proper alignment/layout ? */
196262152Sluigi	BDG_RWLOCK_T	bdg_lock;	/* protects bdg_ports */
197262152Sluigi	int		bdg_namelen;
198262152Sluigi	uint32_t	bdg_active_ports; /* 0 means free */
199262152Sluigi	char		bdg_basename[IFNAMSIZ];
200262152Sluigi
201262152Sluigi	/* Indexes of active ports (up to active_ports)
202262152Sluigi	 * and all other remaining ports.
203262152Sluigi	 */
204262152Sluigi	uint8_t		bdg_port_index[NM_BDG_MAXPORTS];
205262152Sluigi
206262152Sluigi	struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS];
207262152Sluigi
208262152Sluigi
209262152Sluigi	/*
210262152Sluigi	 * The function to decide the destination port.
211262152Sluigi	 * It returns either of an index of the destination port,
212262152Sluigi	 * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
213262152Sluigi	 * forward this packet.  ring_nr is the source ring index, and the
214262152Sluigi	 * function may overwrite this value to forward this packet to a
215262152Sluigi	 * different ring index.
216262152Sluigi	 * This function must be set by netmap_bdgctl().
217262152Sluigi	 */
218270252Sluigi	struct netmap_bdg_ops bdg_ops;
219262152Sluigi
220262152Sluigi	/* the forwarding table, MAC+ports.
221262152Sluigi	 * XXX should be changed to an argument to be passed to
222262152Sluigi	 * the lookup function, and allocated on attach
223262152Sluigi	 */
224262152Sluigi	struct nm_hash_ent ht[NM_BDG_HASH];
225262152Sluigi};
226262152Sluigi
227270252Sluigiconst char*
228270252Sluiginetmap_bdg_name(struct netmap_vp_adapter *vp)
229270252Sluigi{
230270252Sluigi	struct nm_bridge *b = vp->na_bdg;
231270252Sluigi	if (b == NULL)
232270252Sluigi		return NULL;
233270252Sluigi	return b->bdg_basename;
234270252Sluigi}
235262152Sluigi
236270252Sluigi
237262152Sluigi/*
238262152Sluigi * XXX in principle nm_bridges could be created dynamically
239262152Sluigi * Right now we have a static array and deletions are protected
240262152Sluigi * by an exclusive lock.
241262152Sluigi */
242262152Sluigistruct nm_bridge nm_bridges[NM_BRIDGES];
243262152Sluigi
244262152Sluigi
245262152Sluigi/*
246262152Sluigi * this is a slightly optimized copy routine which rounds
247262152Sluigi * to multiple of 64 bytes and is often faster than dealing
248262152Sluigi * with other odd sizes. We assume there is enough room
249262152Sluigi * in the source and destination buffers.
250262152Sluigi *
251262152Sluigi * XXX only for multiples of 64 bytes, non overlapped.
252262152Sluigi */
253262152Sluigistatic inline void
254262152Sluigipkt_copy(void *_src, void *_dst, int l)
255262152Sluigi{
256262152Sluigi        uint64_t *src = _src;
257262152Sluigi        uint64_t *dst = _dst;
258262152Sluigi        if (unlikely(l >= 1024)) {
259262152Sluigi                memcpy(dst, src, l);
260262152Sluigi                return;
261262152Sluigi        }
262262152Sluigi        for (; likely(l > 0); l-=64) {
263262152Sluigi                *dst++ = *src++;
264262152Sluigi                *dst++ = *src++;
265262152Sluigi                *dst++ = *src++;
266262152Sluigi                *dst++ = *src++;
267262152Sluigi                *dst++ = *src++;
268262152Sluigi                *dst++ = *src++;
269262152Sluigi                *dst++ = *src++;
270262152Sluigi                *dst++ = *src++;
271262152Sluigi        }
272262152Sluigi}
273262152Sluigi
274262152Sluigi
275262152Sluigi/*
276262152Sluigi * locate a bridge among the existing ones.
277262152Sluigi * MUST BE CALLED WITH NMG_LOCK()
278262152Sluigi *
279262152Sluigi * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
280262152Sluigi * We assume that this is called with a name of at least NM_NAME chars.
281262152Sluigi */
282262152Sluigistatic struct nm_bridge *
283262152Sluiginm_find_bridge(const char *name, int create)
284262152Sluigi{
285262152Sluigi	int i, l, namelen;
286262152Sluigi	struct nm_bridge *b = NULL;
287262152Sluigi
288262152Sluigi	NMG_LOCK_ASSERT();
289262152Sluigi
290262152Sluigi	namelen = strlen(NM_NAME);	/* base length */
291262152Sluigi	l = name ? strlen(name) : 0;		/* actual length */
292262152Sluigi	if (l < namelen) {
293262152Sluigi		D("invalid bridge name %s", name ? name : NULL);
294262152Sluigi		return NULL;
295262152Sluigi	}
296262152Sluigi	for (i = namelen + 1; i < l; i++) {
297262152Sluigi		if (name[i] == ':') {
298262152Sluigi			namelen = i;
299262152Sluigi			break;
300262152Sluigi		}
301262152Sluigi	}
302262152Sluigi	if (namelen >= IFNAMSIZ)
303262152Sluigi		namelen = IFNAMSIZ;
304262152Sluigi	ND("--- prefix is '%.*s' ---", namelen, name);
305262152Sluigi
306262152Sluigi	/* lookup the name, remember empty slot if there is one */
307262152Sluigi	for (i = 0; i < NM_BRIDGES; i++) {
308262152Sluigi		struct nm_bridge *x = nm_bridges + i;
309262152Sluigi
310262152Sluigi		if (x->bdg_active_ports == 0) {
311262152Sluigi			if (create && b == NULL)
312262152Sluigi				b = x;	/* record empty slot */
313262152Sluigi		} else if (x->bdg_namelen != namelen) {
314262152Sluigi			continue;
315262152Sluigi		} else if (strncmp(name, x->bdg_basename, namelen) == 0) {
316262152Sluigi			ND("found '%.*s' at %d", namelen, name, i);
317262152Sluigi			b = x;
318262152Sluigi			break;
319262152Sluigi		}
320262152Sluigi	}
321262152Sluigi	if (i == NM_BRIDGES && b) { /* name not found, can create entry */
322262152Sluigi		/* initialize the bridge */
323262152Sluigi		strncpy(b->bdg_basename, name, namelen);
324262152Sluigi		ND("create new bridge %s with ports %d", b->bdg_basename,
325262152Sluigi			b->bdg_active_ports);
326262152Sluigi		b->bdg_namelen = namelen;
327262152Sluigi		b->bdg_active_ports = 0;
328262152Sluigi		for (i = 0; i < NM_BDG_MAXPORTS; i++)
329262152Sluigi			b->bdg_port_index[i] = i;
330262152Sluigi		/* set the default function */
331270252Sluigi		b->bdg_ops.lookup = netmap_bdg_learning;
332262152Sluigi		/* reset the MAC address table */
333262152Sluigi		bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
334262152Sluigi	}
335262152Sluigi	return b;
336262152Sluigi}
337262152Sluigi
338262152Sluigi
339262152Sluigi/*
340262152Sluigi * Free the forwarding tables for rings attached to switch ports.
341262152Sluigi */
342262152Sluigistatic void
343262152Sluiginm_free_bdgfwd(struct netmap_adapter *na)
344262152Sluigi{
345262152Sluigi	int nrings, i;
346262152Sluigi	struct netmap_kring *kring;
347262152Sluigi
348262152Sluigi	NMG_LOCK_ASSERT();
349262152Sluigi	nrings = na->num_tx_rings;
350262152Sluigi	kring = na->tx_rings;
351262152Sluigi	for (i = 0; i < nrings; i++) {
352262152Sluigi		if (kring[i].nkr_ft) {
353262152Sluigi			free(kring[i].nkr_ft, M_DEVBUF);
354262152Sluigi			kring[i].nkr_ft = NULL; /* protect from freeing twice */
355262152Sluigi		}
356262152Sluigi	}
357262152Sluigi}
358262152Sluigi
359262152Sluigi
360262152Sluigi/*
361262152Sluigi * Allocate the forwarding tables for the rings attached to the bridge ports.
362262152Sluigi */
363262152Sluigistatic int
364262152Sluiginm_alloc_bdgfwd(struct netmap_adapter *na)
365262152Sluigi{
366262152Sluigi	int nrings, l, i, num_dstq;
367262152Sluigi	struct netmap_kring *kring;
368262152Sluigi
369262152Sluigi	NMG_LOCK_ASSERT();
370262152Sluigi	/* all port:rings + broadcast */
371262152Sluigi	num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
372262152Sluigi	l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
373262152Sluigi	l += sizeof(struct nm_bdg_q) * num_dstq;
374262152Sluigi	l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
375262152Sluigi
376262152Sluigi	nrings = netmap_real_tx_rings(na);
377262152Sluigi	kring = na->tx_rings;
378262152Sluigi	for (i = 0; i < nrings; i++) {
379262152Sluigi		struct nm_bdg_fwd *ft;
380262152Sluigi		struct nm_bdg_q *dstq;
381262152Sluigi		int j;
382262152Sluigi
383262152Sluigi		ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
384262152Sluigi		if (!ft) {
385262152Sluigi			nm_free_bdgfwd(na);
386262152Sluigi			return ENOMEM;
387262152Sluigi		}
388262152Sluigi		dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
389262152Sluigi		for (j = 0; j < num_dstq; j++) {
390262152Sluigi			dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
391262152Sluigi			dstq[j].bq_len = 0;
392262152Sluigi		}
393262152Sluigi		kring[i].nkr_ft = ft;
394262152Sluigi	}
395262152Sluigi	return 0;
396262152Sluigi}
397262152Sluigi
398262152Sluigi
399270252Sluigi/* remove from bridge b the ports in slots hw and sw
400270252Sluigi * (sw can be -1 if not needed)
401270252Sluigi */
402262152Sluigistatic void
403262152Sluiginetmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
404262152Sluigi{
405262152Sluigi	int s_hw = hw, s_sw = sw;
406262152Sluigi	int i, lim =b->bdg_active_ports;
407262152Sluigi	uint8_t tmp[NM_BDG_MAXPORTS];
408262152Sluigi
409262152Sluigi	/*
410262152Sluigi	New algorithm:
411262152Sluigi	make a copy of bdg_port_index;
412262152Sluigi	lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
413262152Sluigi	in the array of bdg_port_index, replacing them with
414262152Sluigi	entries from the bottom of the array;
415262152Sluigi	decrement bdg_active_ports;
416262152Sluigi	acquire BDG_WLOCK() and copy back the array.
417262152Sluigi	 */
418262152Sluigi
419262152Sluigi	if (netmap_verbose)
420262152Sluigi		D("detach %d and %d (lim %d)", hw, sw, lim);
421262152Sluigi	/* make a copy of the list of active ports, update it,
422262152Sluigi	 * and then copy back within BDG_WLOCK().
423262152Sluigi	 */
424262152Sluigi	memcpy(tmp, b->bdg_port_index, sizeof(tmp));
425262152Sluigi	for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
426262152Sluigi		if (hw >= 0 && tmp[i] == hw) {
427262152Sluigi			ND("detach hw %d at %d", hw, i);
428262152Sluigi			lim--; /* point to last active port */
429262152Sluigi			tmp[i] = tmp[lim]; /* swap with i */
430262152Sluigi			tmp[lim] = hw;	/* now this is inactive */
431262152Sluigi			hw = -1;
432262152Sluigi		} else if (sw >= 0 && tmp[i] == sw) {
433262152Sluigi			ND("detach sw %d at %d", sw, i);
434262152Sluigi			lim--;
435262152Sluigi			tmp[i] = tmp[lim];
436262152Sluigi			tmp[lim] = sw;
437262152Sluigi			sw = -1;
438262152Sluigi		} else {
439262152Sluigi			i++;
440262152Sluigi		}
441262152Sluigi	}
442262152Sluigi	if (hw >= 0 || sw >= 0) {
443262152Sluigi		D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
444262152Sluigi	}
445262152Sluigi
446262152Sluigi	BDG_WLOCK(b);
447270252Sluigi	if (b->bdg_ops.dtor)
448270252Sluigi		b->bdg_ops.dtor(b->bdg_ports[s_hw]);
449262152Sluigi	b->bdg_ports[s_hw] = NULL;
450262152Sluigi	if (s_sw >= 0) {
451262152Sluigi		b->bdg_ports[s_sw] = NULL;
452262152Sluigi	}
453262152Sluigi	memcpy(b->bdg_port_index, tmp, sizeof(tmp));
454262152Sluigi	b->bdg_active_ports = lim;
455262152Sluigi	BDG_WUNLOCK(b);
456262152Sluigi
457262152Sluigi	ND("now %d active ports", lim);
458262152Sluigi	if (lim == 0) {
459262152Sluigi		ND("marking bridge %s as free", b->bdg_basename);
460270252Sluigi		bzero(&b->bdg_ops, sizeof(b->bdg_ops));
461262152Sluigi	}
462262152Sluigi}
463262152Sluigi
464270252Sluigi/* nm_bdg_ctl callback for VALE ports */
465270252Sluigistatic int
466270252Sluiginetmap_vp_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
467270252Sluigi{
468270252Sluigi	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
469270252Sluigi	struct nm_bridge *b = vpna->na_bdg;
470262152Sluigi
471270252Sluigi	if (attach)
472270252Sluigi		return 0; /* nothing to do */
473270252Sluigi	if (b) {
474270252Sluigi		netmap_set_all_rings(na, 0 /* disable */);
475270252Sluigi		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
476270252Sluigi		vpna->na_bdg = NULL;
477270252Sluigi		netmap_set_all_rings(na, 1 /* enable */);
478270252Sluigi	}
479270252Sluigi	/* I have took reference just for attach */
480270252Sluigi	netmap_adapter_put(na);
481270252Sluigi	return 0;
482270252Sluigi}
483270252Sluigi
484270252Sluigi/* nm_dtor callback for ephemeral VALE ports */
485262152Sluigistatic void
486270252Sluiginetmap_vp_dtor(struct netmap_adapter *na)
487262152Sluigi{
488262152Sluigi	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
489262152Sluigi	struct nm_bridge *b = vpna->na_bdg;
490262152Sluigi
491270252Sluigi	ND("%s has %d references", na->name, na->na_refcount);
492262152Sluigi
493262152Sluigi	if (b) {
494262152Sluigi		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
495262152Sluigi	}
496270252Sluigi}
497262152Sluigi
498270252Sluigi/* nm_dtor callback for persistent VALE ports */
499270252Sluigistatic void
500270252Sluiginetmap_persist_vp_dtor(struct netmap_adapter *na)
501270252Sluigi{
502270252Sluigi	struct ifnet *ifp = na->ifp;
503270252Sluigi
504270252Sluigi	netmap_vp_dtor(na);
505262152Sluigi	na->ifp = NULL;
506270252Sluigi	nm_vi_detach(ifp);
507262152Sluigi}
508262152Sluigi
509270252Sluigi/* remove a persistent VALE port from the system */
510270252Sluigistatic int
511270252Sluiginm_vi_destroy(const char *name)
512270252Sluigi{
513270252Sluigi	struct ifnet *ifp;
514270252Sluigi	int error;
515262152Sluigi
516270252Sluigi	ifp = ifunit_ref(name);
517270252Sluigi	if (!ifp)
518270252Sluigi		return ENXIO;
519270252Sluigi	NMG_LOCK();
520270252Sluigi	/* make sure this is actually a VALE port */
521270252Sluigi	if (!NETMAP_CAPABLE(ifp) || NA(ifp)->nm_register != netmap_vp_reg) {
522270252Sluigi		error = EINVAL;
523270252Sluigi		goto err;
524270252Sluigi	}
525270252Sluigi
526270252Sluigi	if (NA(ifp)->na_refcount > 1) {
527270252Sluigi		error = EBUSY;
528270252Sluigi		goto err;
529270252Sluigi	}
530270252Sluigi	NMG_UNLOCK();
531270252Sluigi
532270252Sluigi	D("destroying a persistent vale interface %s", ifp->if_xname);
533270252Sluigi	/* Linux requires all the references are released
534270252Sluigi	 * before unregister
535270252Sluigi	 */
536270252Sluigi	if_rele(ifp);
537270252Sluigi	netmap_detach(ifp);
538270252Sluigi	return 0;
539270252Sluigi
540270252Sluigierr:
541270252Sluigi	NMG_UNLOCK();
542270252Sluigi	if_rele(ifp);
543270252Sluigi	return error;
544270252Sluigi}
545270252Sluigi
546270252Sluigi/*
547270252Sluigi * Create a virtual interface registered to the system.
548270252Sluigi * The interface will be attached to a bridge later.
549270252Sluigi */
550270252Sluigistatic int
551270252Sluiginm_vi_create(struct nmreq *nmr)
552270252Sluigi{
553270252Sluigi	struct ifnet *ifp;
554270252Sluigi	struct netmap_vp_adapter *vpna;
555270252Sluigi	int error;
556270252Sluigi
557270252Sluigi	/* don't include VALE prefix */
558270252Sluigi	if (!strncmp(nmr->nr_name, NM_NAME, strlen(NM_NAME)))
559270252Sluigi		return EINVAL;
560270252Sluigi	ifp = ifunit_ref(nmr->nr_name);
561270252Sluigi	if (ifp) { /* already exist, cannot create new one */
562270252Sluigi		if_rele(ifp);
563270252Sluigi		return EEXIST;
564270252Sluigi	}
565270252Sluigi	error = nm_vi_persist(nmr->nr_name, &ifp);
566270252Sluigi	if (error)
567270252Sluigi		return error;
568270252Sluigi
569270252Sluigi	NMG_LOCK();
570270252Sluigi	/* netmap_vp_create creates a struct netmap_vp_adapter */
571270252Sluigi	error = netmap_vp_create(nmr, ifp, &vpna);
572270252Sluigi	if (error) {
573270252Sluigi		D("error %d", error);
574270252Sluigi		nm_vi_detach(ifp);
575270252Sluigi		return error;
576270252Sluigi	}
577270252Sluigi	/* persist-specific routines */
578270252Sluigi	vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl;
579270252Sluigi	vpna->up.nm_dtor = netmap_persist_vp_dtor;
580270252Sluigi	netmap_adapter_get(&vpna->up);
581270252Sluigi	NMG_UNLOCK();
582270252Sluigi	D("created %s", ifp->if_xname);
583270252Sluigi	return 0;
584270252Sluigi}
585270252Sluigi
586262152Sluigi/* Try to get a reference to a netmap adapter attached to a VALE switch.
587262152Sluigi * If the adapter is found (or is created), this function returns 0, a
588262152Sluigi * non NULL pointer is returned into *na, and the caller holds a
589262152Sluigi * reference to the adapter.
590262152Sluigi * If an adapter is not found, then no reference is grabbed and the
591262152Sluigi * function returns an error code, or 0 if there is just a VALE prefix
592262152Sluigi * mismatch. Therefore the caller holds a reference when
593262152Sluigi * (*na != NULL && return == 0).
594262152Sluigi */
595262152Sluigiint
596262152Sluiginetmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
597262152Sluigi{
598270252Sluigi	char *nr_name = nmr->nr_name;
599270252Sluigi	const char *ifname;
600262152Sluigi	struct ifnet *ifp;
601262152Sluigi	int error = 0;
602270252Sluigi	struct netmap_vp_adapter *vpna, *hostna = NULL;
603262152Sluigi	struct nm_bridge *b;
604262152Sluigi	int i, j, cand = -1, cand2 = -1;
605262152Sluigi	int needed;
606262152Sluigi
607262152Sluigi	*na = NULL;     /* default return value */
608262152Sluigi
609262152Sluigi	/* first try to see if this is a bridge port. */
610262152Sluigi	NMG_LOCK_ASSERT();
611270252Sluigi	if (strncmp(nr_name, NM_NAME, sizeof(NM_NAME) - 1)) {
612262152Sluigi		return 0;  /* no error, but no VALE prefix */
613262152Sluigi	}
614262152Sluigi
615270252Sluigi	b = nm_find_bridge(nr_name, create);
616262152Sluigi	if (b == NULL) {
617270252Sluigi		D("no bridges available for '%s'", nr_name);
618262152Sluigi		return (create ? ENOMEM : ENXIO);
619262152Sluigi	}
620270252Sluigi	if (strlen(nr_name) < b->bdg_namelen) /* impossible */
621270252Sluigi		panic("x");
622262152Sluigi
623262152Sluigi	/* Now we are sure that name starts with the bridge's name,
624262152Sluigi	 * lookup the port in the bridge. We need to scan the entire
625262152Sluigi	 * list. It is not important to hold a WLOCK on the bridge
626262152Sluigi	 * during the search because NMG_LOCK already guarantees
627262152Sluigi	 * that there are no other possible writers.
628262152Sluigi	 */
629262152Sluigi
630262152Sluigi	/* lookup in the local list of ports */
631262152Sluigi	for (j = 0; j < b->bdg_active_ports; j++) {
632262152Sluigi		i = b->bdg_port_index[j];
633262152Sluigi		vpna = b->bdg_ports[i];
634262152Sluigi		// KASSERT(na != NULL);
635270252Sluigi		D("checking %s", vpna->up.name);
636270252Sluigi		if (!strcmp(vpna->up.name, nr_name)) {
637262152Sluigi			netmap_adapter_get(&vpna->up);
638270252Sluigi			ND("found existing if %s refs %d", nr_name)
639270252Sluigi			*na = &vpna->up;
640262152Sluigi			return 0;
641262152Sluigi		}
642262152Sluigi	}
643262152Sluigi	/* not found, should we create it? */
644262152Sluigi	if (!create)
645262152Sluigi		return ENXIO;
646262152Sluigi	/* yes we should, see if we have space to attach entries */
647262152Sluigi	needed = 2; /* in some cases we only need 1 */
648262152Sluigi	if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
649262152Sluigi		D("bridge full %d, cannot create new port", b->bdg_active_ports);
650262152Sluigi		return ENOMEM;
651262152Sluigi	}
652262152Sluigi	/* record the next two ports available, but do not allocate yet */
653262152Sluigi	cand = b->bdg_port_index[b->bdg_active_ports];
654262152Sluigi	cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
655262152Sluigi	ND("+++ bridge %s port %s used %d avail %d %d",
656270252Sluigi		b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2);
657262152Sluigi
658262152Sluigi	/*
659262152Sluigi	 * try see if there is a matching NIC with this name
660262152Sluigi	 * (after the bridge's name)
661262152Sluigi	 */
662270252Sluigi	ifname = nr_name + b->bdg_namelen + 1;
663270252Sluigi	ifp = ifunit_ref(ifname);
664270252Sluigi	if (!ifp) {
665270252Sluigi		/* Create an ephemeral virtual port
666270252Sluigi		 * This block contains all the ephemeral-specific logics
667270252Sluigi		 */
668262152Sluigi		if (nmr->nr_cmd) {
669262152Sluigi			/* nr_cmd must be 0 for a virtual port */
670262152Sluigi			return EINVAL;
671262152Sluigi		}
672262152Sluigi
673262152Sluigi		/* bdg_netmap_attach creates a struct netmap_adapter */
674270252Sluigi		error = netmap_vp_create(nmr, NULL, &vpna);
675262152Sluigi		if (error) {
676262152Sluigi			D("error %d", error);
677262152Sluigi			free(ifp, M_DEVBUF);
678262152Sluigi			return error;
679262152Sluigi		}
680270252Sluigi		/* shortcut - we can skip get_hw_na(),
681270252Sluigi		 * ownership check and nm_bdg_attach()
682270252Sluigi		 */
683270252Sluigi	} else {
684270252Sluigi		struct netmap_adapter *hw;
685262152Sluigi
686270252Sluigi		error = netmap_get_hw_na(ifp, &hw);
687270252Sluigi		if (error || hw == NULL)
688262152Sluigi			goto out;
689262152Sluigi
690270252Sluigi		/* host adapter might not be created */
691270252Sluigi		error = hw->nm_bdg_attach(nr_name, hw);
692270252Sluigi		if (error)
693262152Sluigi			goto out;
694270252Sluigi		vpna = hw->na_vp;
695270252Sluigi		hostna = hw->na_hostvp;
696270252Sluigi		if_rele(ifp);
697262152Sluigi		if (nmr->nr_arg1 != NETMAP_BDG_HOST)
698270252Sluigi			hostna = NULL;
699262152Sluigi	}
700262152Sluigi
701262152Sluigi	BDG_WLOCK(b);
702262152Sluigi	vpna->bdg_port = cand;
703262152Sluigi	ND("NIC  %p to bridge port %d", vpna, cand);
704262152Sluigi	/* bind the port to the bridge (virtual ports are not active) */
705262152Sluigi	b->bdg_ports[cand] = vpna;
706262152Sluigi	vpna->na_bdg = b;
707262152Sluigi	b->bdg_active_ports++;
708270252Sluigi	if (hostna != NULL) {
709262152Sluigi		/* also bind the host stack to the bridge */
710262152Sluigi		b->bdg_ports[cand2] = hostna;
711262152Sluigi		hostna->bdg_port = cand2;
712262152Sluigi		hostna->na_bdg = b;
713262152Sluigi		b->bdg_active_ports++;
714262152Sluigi		ND("host %p to bridge port %d", hostna, cand2);
715262152Sluigi	}
716270252Sluigi	ND("if %s refs %d", ifname, vpna->up.na_refcount);
717262152Sluigi	BDG_WUNLOCK(b);
718270252Sluigi	*na = &vpna->up;
719270252Sluigi	netmap_adapter_get(*na);
720262152Sluigi	return 0;
721262152Sluigi
722262152Sluigiout:
723262152Sluigi	if_rele(ifp);
724262152Sluigi
725262152Sluigi	return error;
726262152Sluigi}
727262152Sluigi
728262152Sluigi
729270252Sluigi/* Process NETMAP_BDG_ATTACH */
730262152Sluigistatic int
731270252Sluiginm_bdg_ctl_attach(struct nmreq *nmr)
732262152Sluigi{
733262152Sluigi	struct netmap_adapter *na;
734262152Sluigi	int error;
735262152Sluigi
736262152Sluigi	NMG_LOCK();
737262152Sluigi
738262152Sluigi	error = netmap_get_bdg_na(nmr, &na, 1 /* create if not exists */);
739270252Sluigi	if (error) /* no device */
740262152Sluigi		goto unlock_exit;
741262152Sluigi
742262152Sluigi	if (na == NULL) { /* VALE prefix missing */
743262152Sluigi		error = EINVAL;
744262152Sluigi		goto unlock_exit;
745262152Sluigi	}
746262152Sluigi
747270252Sluigi	if (NETMAP_OWNED_BY_ANY(na)) {
748262152Sluigi		error = EBUSY;
749262152Sluigi		goto unref_exit;
750262152Sluigi	}
751262152Sluigi
752270252Sluigi	if (na->nm_bdg_ctl) {
753270252Sluigi		/* nop for VALE ports. The bwrap needs to put the hwna
754270252Sluigi		 * in netmap mode (see netmap_bwrap_bdg_ctl)
755270252Sluigi		 */
756270252Sluigi		error = na->nm_bdg_ctl(na, nmr, 1);
757270252Sluigi		if (error)
758270252Sluigi			goto unref_exit;
759270252Sluigi		ND("registered %s to netmap-mode", na->name);
760262152Sluigi	}
761262152Sluigi	NMG_UNLOCK();
762262152Sluigi	return 0;
763262152Sluigi
764262152Sluigiunref_exit:
765262152Sluigi	netmap_adapter_put(na);
766262152Sluigiunlock_exit:
767262152Sluigi	NMG_UNLOCK();
768262152Sluigi	return error;
769262152Sluigi}
770262152Sluigi
771262152Sluigi
772270252Sluigi/* process NETMAP_BDG_DETACH */
773262152Sluigistatic int
774270252Sluiginm_bdg_ctl_detach(struct nmreq *nmr)
775262152Sluigi{
776262152Sluigi	struct netmap_adapter *na;
777262152Sluigi	int error;
778262152Sluigi
779262152Sluigi	NMG_LOCK();
780262152Sluigi	error = netmap_get_bdg_na(nmr, &na, 0 /* don't create */);
781262152Sluigi	if (error) { /* no device, or another bridge or user owns the device */
782262152Sluigi		goto unlock_exit;
783262152Sluigi	}
784262152Sluigi
785262152Sluigi	if (na == NULL) { /* VALE prefix missing */
786262152Sluigi		error = EINVAL;
787262152Sluigi		goto unlock_exit;
788262152Sluigi	}
789262152Sluigi
790270252Sluigi	if (na->nm_bdg_ctl) {
791270252Sluigi		/* remove the port from bridge. The bwrap
792270252Sluigi		 * also needs to put the hwna in normal mode
793270252Sluigi		 */
794270252Sluigi		error = na->nm_bdg_ctl(na, nmr, 0);
795262152Sluigi	}
796262152Sluigi
797262152Sluigi	netmap_adapter_put(na);
798262152Sluigiunlock_exit:
799262152Sluigi	NMG_UNLOCK();
800262152Sluigi	return error;
801262152Sluigi
802262152Sluigi}
803262152Sluigi
804262152Sluigi
805270252Sluigi/* Called by either user's context (netmap_ioctl())
806270252Sluigi * or external kernel modules (e.g., Openvswitch).
807270252Sluigi * Operation is indicated in nmr->nr_cmd.
808270252Sluigi * NETMAP_BDG_OPS that sets configure/lookup/dtor functions to the bridge
809270252Sluigi * requires bdg_ops argument; the other commands ignore this argument.
810270252Sluigi *
811262152Sluigi * Called without NMG_LOCK.
812262152Sluigi */
813262152Sluigiint
814270252Sluiginetmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
815262152Sluigi{
816262152Sluigi	struct nm_bridge *b;
817262152Sluigi	struct netmap_adapter *na;
818262152Sluigi	struct netmap_vp_adapter *vpna;
819262152Sluigi	char *name = nmr->nr_name;
820262152Sluigi	int cmd = nmr->nr_cmd, namelen = strlen(name);
821262152Sluigi	int error = 0, i, j;
822262152Sluigi
823262152Sluigi	switch (cmd) {
824270252Sluigi	case NETMAP_BDG_NEWIF:
825270252Sluigi		error = nm_vi_create(nmr);
826270252Sluigi		break;
827270252Sluigi
828270252Sluigi	case NETMAP_BDG_DELIF:
829270252Sluigi		error = nm_vi_destroy(nmr->nr_name);
830270252Sluigi		break;
831270252Sluigi
832262152Sluigi	case NETMAP_BDG_ATTACH:
833270252Sluigi		error = nm_bdg_ctl_attach(nmr);
834262152Sluigi		break;
835262152Sluigi
836262152Sluigi	case NETMAP_BDG_DETACH:
837270252Sluigi		error = nm_bdg_ctl_detach(nmr);
838262152Sluigi		break;
839262152Sluigi
840262152Sluigi	case NETMAP_BDG_LIST:
841262152Sluigi		/* this is used to enumerate bridges and ports */
842262152Sluigi		if (namelen) { /* look up indexes of bridge and port */
843262152Sluigi			if (strncmp(name, NM_NAME, strlen(NM_NAME))) {
844262152Sluigi				error = EINVAL;
845262152Sluigi				break;
846262152Sluigi			}
847262152Sluigi			NMG_LOCK();
848262152Sluigi			b = nm_find_bridge(name, 0 /* don't create */);
849262152Sluigi			if (!b) {
850262152Sluigi				error = ENOENT;
851262152Sluigi				NMG_UNLOCK();
852262152Sluigi				break;
853262152Sluigi			}
854262152Sluigi
855270252Sluigi			name = name + b->bdg_namelen + 1;
856262152Sluigi			error = ENOENT;
857262152Sluigi			for (j = 0; j < b->bdg_active_ports; j++) {
858262152Sluigi				i = b->bdg_port_index[j];
859262152Sluigi				vpna = b->bdg_ports[i];
860262152Sluigi				if (vpna == NULL) {
861262152Sluigi					D("---AAAAAAAAARGH-------");
862262152Sluigi					continue;
863262152Sluigi				}
864262152Sluigi				/* the former and the latter identify a
865262152Sluigi				 * virtual port and a NIC, respectively
866262152Sluigi				 */
867270252Sluigi				if (!strcmp(vpna->up.name, name)) {
868262152Sluigi					/* bridge index */
869262152Sluigi					nmr->nr_arg1 = b - nm_bridges;
870262152Sluigi					nmr->nr_arg2 = i; /* port index */
871262152Sluigi					error = 0;
872262152Sluigi					break;
873262152Sluigi				}
874262152Sluigi			}
875262152Sluigi			NMG_UNLOCK();
876262152Sluigi		} else {
877262152Sluigi			/* return the first non-empty entry starting from
878262152Sluigi			 * bridge nr_arg1 and port nr_arg2.
879262152Sluigi			 *
880262152Sluigi			 * Users can detect the end of the same bridge by
881262152Sluigi			 * seeing the new and old value of nr_arg1, and can
882262152Sluigi			 * detect the end of all the bridge by error != 0
883262152Sluigi			 */
884262152Sluigi			i = nmr->nr_arg1;
885262152Sluigi			j = nmr->nr_arg2;
886262152Sluigi
887262152Sluigi			NMG_LOCK();
888262152Sluigi			for (error = ENOENT; i < NM_BRIDGES; i++) {
889262152Sluigi				b = nm_bridges + i;
890262152Sluigi				if (j >= b->bdg_active_ports) {
891262152Sluigi					j = 0; /* following bridges scan from 0 */
892262152Sluigi					continue;
893262152Sluigi				}
894262152Sluigi				nmr->nr_arg1 = i;
895262152Sluigi				nmr->nr_arg2 = j;
896262152Sluigi				j = b->bdg_port_index[j];
897262152Sluigi				vpna = b->bdg_ports[j];
898270252Sluigi				strncpy(name, vpna->up.name, (size_t)IFNAMSIZ);
899262152Sluigi				error = 0;
900262152Sluigi				break;
901262152Sluigi			}
902262152Sluigi			NMG_UNLOCK();
903262152Sluigi		}
904262152Sluigi		break;
905262152Sluigi
906270252Sluigi	case NETMAP_BDG_REGOPS: /* XXX this should not be available from userspace */
907270252Sluigi		/* register callbacks to the given bridge.
908262152Sluigi		 * nmr->nr_name may be just bridge's name (including ':'
909262152Sluigi		 * if it is not just NM_NAME).
910262152Sluigi		 */
911270252Sluigi		if (!bdg_ops) {
912262152Sluigi			error = EINVAL;
913262152Sluigi			break;
914262152Sluigi		}
915262152Sluigi		NMG_LOCK();
916262152Sluigi		b = nm_find_bridge(name, 0 /* don't create */);
917262152Sluigi		if (!b) {
918262152Sluigi			error = EINVAL;
919262152Sluigi		} else {
920270252Sluigi			b->bdg_ops = *bdg_ops;
921262152Sluigi		}
922262152Sluigi		NMG_UNLOCK();
923262152Sluigi		break;
924262152Sluigi
925262152Sluigi	case NETMAP_BDG_VNET_HDR:
926262152Sluigi		/* Valid lengths for the virtio-net header are 0 (no header),
927262152Sluigi		   10 and 12. */
928262152Sluigi		if (nmr->nr_arg1 != 0 &&
929262152Sluigi			nmr->nr_arg1 != sizeof(struct nm_vnet_hdr) &&
930262152Sluigi				nmr->nr_arg1 != 12) {
931262152Sluigi			error = EINVAL;
932262152Sluigi			break;
933262152Sluigi		}
934262152Sluigi		NMG_LOCK();
935262152Sluigi		error = netmap_get_bdg_na(nmr, &na, 0);
936262152Sluigi		if (na && !error) {
937262152Sluigi			vpna = (struct netmap_vp_adapter *)na;
938262152Sluigi			vpna->virt_hdr_len = nmr->nr_arg1;
939262152Sluigi			if (vpna->virt_hdr_len)
940270252Sluigi				vpna->mfs = NETMAP_BUF_SIZE(na);
941262152Sluigi			D("Using vnet_hdr_len %d for %p", vpna->virt_hdr_len, vpna);
942262152Sluigi			netmap_adapter_put(na);
943262152Sluigi		}
944262152Sluigi		NMG_UNLOCK();
945262152Sluigi		break;
946262152Sluigi
947262152Sluigi	default:
948262152Sluigi		D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd);
949262152Sluigi		error = EINVAL;
950262152Sluigi		break;
951262152Sluigi	}
952262152Sluigi	return error;
953262152Sluigi}
954262152Sluigi
955270252Sluigiint
956270252Sluiginetmap_bdg_config(struct nmreq *nmr)
957270252Sluigi{
958270252Sluigi	struct nm_bridge *b;
959270252Sluigi	int error = EINVAL;
960270252Sluigi
961270252Sluigi	NMG_LOCK();
962270252Sluigi	b = nm_find_bridge(nmr->nr_name, 0);
963270252Sluigi	if (!b) {
964270252Sluigi		NMG_UNLOCK();
965270252Sluigi		return error;
966270252Sluigi	}
967270252Sluigi	NMG_UNLOCK();
968270252Sluigi	/* Don't call config() with NMG_LOCK() held */
969270252Sluigi	BDG_RLOCK(b);
970270252Sluigi	if (b->bdg_ops.config != NULL)
971270252Sluigi		error = b->bdg_ops.config((struct nm_ifreq *)nmr);
972270252Sluigi	BDG_RUNLOCK(b);
973270252Sluigi	return error;
974270252Sluigi}
975270252Sluigi
976270252Sluigi
977270252Sluigi/* nm_krings_create callback for VALE ports.
978270252Sluigi * Calls the standard netmap_krings_create, then adds leases on rx
979270252Sluigi * rings and bdgfwd on tx rings.
980270252Sluigi */
981262152Sluigistatic int
982262152Sluiginetmap_vp_krings_create(struct netmap_adapter *na)
983262152Sluigi{
984262152Sluigi	u_int tailroom;
985262152Sluigi	int error, i;
986262152Sluigi	uint32_t *leases;
987262152Sluigi	u_int nrx = netmap_real_rx_rings(na);
988262152Sluigi
989262152Sluigi	/*
990262152Sluigi	 * Leases are attached to RX rings on vale ports
991262152Sluigi	 */
992262152Sluigi	tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;
993262152Sluigi
994262152Sluigi	error = netmap_krings_create(na, tailroom);
995262152Sluigi	if (error)
996262152Sluigi		return error;
997262152Sluigi
998262152Sluigi	leases = na->tailroom;
999262152Sluigi
1000262152Sluigi	for (i = 0; i < nrx; i++) { /* Receive rings */
1001262152Sluigi		na->rx_rings[i].nkr_leases = leases;
1002262152Sluigi		leases += na->num_rx_desc;
1003262152Sluigi	}
1004262152Sluigi
1005262152Sluigi	error = nm_alloc_bdgfwd(na);
1006262152Sluigi	if (error) {
1007262152Sluigi		netmap_krings_delete(na);
1008262152Sluigi		return error;
1009262152Sluigi	}
1010262152Sluigi
1011262152Sluigi	return 0;
1012262152Sluigi}
1013262152Sluigi
1014262152Sluigi
1015270252Sluigi/* nm_krings_delete callback for VALE ports. */
1016262152Sluigistatic void
1017262152Sluiginetmap_vp_krings_delete(struct netmap_adapter *na)
1018262152Sluigi{
1019262152Sluigi	nm_free_bdgfwd(na);
1020262152Sluigi	netmap_krings_delete(na);
1021262152Sluigi}
1022262152Sluigi
1023262152Sluigi
1024262152Sluigistatic int
1025262152Sluiginm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
1026262152Sluigi	struct netmap_vp_adapter *na, u_int ring_nr);
1027262152Sluigi
1028262152Sluigi
1029262152Sluigi/*
1030270252Sluigi * main dispatch routine for the bridge.
1031262152Sluigi * Grab packets from a kring, move them into the ft structure
1032262152Sluigi * associated to the tx (input) port. Max one instance per port,
1033262152Sluigi * filtered on input (ioctl, poll or XXX).
1034262152Sluigi * Returns the next position in the ring.
1035262152Sluigi */
1036262152Sluigistatic int
1037270252Sluiginm_bdg_preflush(struct netmap_kring *kring, u_int end)
1038262152Sluigi{
1039270252Sluigi	struct netmap_vp_adapter *na =
1040270252Sluigi		(struct netmap_vp_adapter*)kring->na;
1041262152Sluigi	struct netmap_ring *ring = kring->ring;
1042262152Sluigi	struct nm_bdg_fwd *ft;
1043270252Sluigi	u_int ring_nr = kring->ring_id;
1044262152Sluigi	u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
1045262152Sluigi	u_int ft_i = 0;	/* start from 0 */
1046262152Sluigi	u_int frags = 1; /* how many frags ? */
1047262152Sluigi	struct nm_bridge *b = na->na_bdg;
1048262152Sluigi
1049262152Sluigi	/* To protect against modifications to the bridge we acquire a
1050262152Sluigi	 * shared lock, waiting if we can sleep (if the source port is
1051262152Sluigi	 * attached to a user process) or with a trylock otherwise (NICs).
1052262152Sluigi	 */
1053262152Sluigi	ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
1054262152Sluigi	if (na->up.na_flags & NAF_BDG_MAYSLEEP)
1055262152Sluigi		BDG_RLOCK(b);
1056262152Sluigi	else if (!BDG_RTRYLOCK(b))
1057262152Sluigi		return 0;
1058262152Sluigi	ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
1059262152Sluigi	ft = kring->nkr_ft;
1060262152Sluigi
1061262152Sluigi	for (; likely(j != end); j = nm_next(j, lim)) {
1062262152Sluigi		struct netmap_slot *slot = &ring->slot[j];
1063262152Sluigi		char *buf;
1064262152Sluigi
1065262152Sluigi		ft[ft_i].ft_len = slot->len;
1066262152Sluigi		ft[ft_i].ft_flags = slot->flags;
1067262152Sluigi
1068262152Sluigi		ND("flags is 0x%x", slot->flags);
1069262152Sluigi		/* this slot goes into a list so initialize the link field */
1070262152Sluigi		ft[ft_i].ft_next = NM_FT_NULL;
1071262152Sluigi		buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
1072270252Sluigi			(void *)(uintptr_t)slot->ptr : NMB(&na->up, slot);
1073267282Sluigi		if (unlikely(buf == NULL)) {
1074267282Sluigi			RD(5, "NULL %s buffer pointer from %s slot %d len %d",
1075267282Sluigi				(slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT",
1076267282Sluigi				kring->name, j, ft[ft_i].ft_len);
1077270252Sluigi			buf = ft[ft_i].ft_buf = NETMAP_BUF_BASE(&na->up);
1078267282Sluigi			ft[ft_i].ft_len = 0;
1079267282Sluigi			ft[ft_i].ft_flags = 0;
1080267282Sluigi		}
1081262152Sluigi		__builtin_prefetch(buf);
1082262152Sluigi		++ft_i;
1083262152Sluigi		if (slot->flags & NS_MOREFRAG) {
1084262152Sluigi			frags++;
1085262152Sluigi			continue;
1086262152Sluigi		}
1087262152Sluigi		if (unlikely(netmap_verbose && frags > 1))
1088262152Sluigi			RD(5, "%d frags at %d", frags, ft_i - frags);
1089262152Sluigi		ft[ft_i - frags].ft_frags = frags;
1090262152Sluigi		frags = 1;
1091262152Sluigi		if (unlikely((int)ft_i >= bridge_batch))
1092262152Sluigi			ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1093262152Sluigi	}
1094262152Sluigi	if (frags > 1) {
1095262152Sluigi		D("truncate incomplete fragment at %d (%d frags)", ft_i, frags);
1096262152Sluigi		// ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG
1097262152Sluigi		ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG;
1098262152Sluigi		ft[ft_i - frags].ft_frags = frags - 1;
1099262152Sluigi	}
1100262152Sluigi	if (ft_i)
1101262152Sluigi		ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1102262152Sluigi	BDG_RUNLOCK(b);
1103262152Sluigi	return j;
1104262152Sluigi}
1105262152Sluigi
1106262152Sluigi
1107262152Sluigi/* ----- FreeBSD if_bridge hash function ------- */
1108262152Sluigi
1109262152Sluigi/*
1110262152Sluigi * The following hash function is adapted from "Hash Functions" by Bob Jenkins
1111262152Sluigi * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
1112262152Sluigi *
1113262152Sluigi * http://www.burtleburtle.net/bob/hash/spooky.html
1114262152Sluigi */
1115262152Sluigi#define mix(a, b, c)                                                    \
1116262152Sluigido {                                                                    \
1117262152Sluigi        a -= b; a -= c; a ^= (c >> 13);                                 \
1118262152Sluigi        b -= c; b -= a; b ^= (a << 8);                                  \
1119262152Sluigi        c -= a; c -= b; c ^= (b >> 13);                                 \
1120262152Sluigi        a -= b; a -= c; a ^= (c >> 12);                                 \
1121262152Sluigi        b -= c; b -= a; b ^= (a << 16);                                 \
1122262152Sluigi        c -= a; c -= b; c ^= (b >> 5);                                  \
1123262152Sluigi        a -= b; a -= c; a ^= (c >> 3);                                  \
1124262152Sluigi        b -= c; b -= a; b ^= (a << 10);                                 \
1125262152Sluigi        c -= a; c -= b; c ^= (b >> 15);                                 \
1126262152Sluigi} while (/*CONSTCOND*/0)
1127262152Sluigi
1128262152Sluigi
1129262152Sluigistatic __inline uint32_t
1130262152Sluiginm_bridge_rthash(const uint8_t *addr)
1131262152Sluigi{
1132262152Sluigi        uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
1133262152Sluigi
1134262152Sluigi        b += addr[5] << 8;
1135262152Sluigi        b += addr[4];
1136262152Sluigi        a += addr[3] << 24;
1137262152Sluigi        a += addr[2] << 16;
1138262152Sluigi        a += addr[1] << 8;
1139262152Sluigi        a += addr[0];
1140262152Sluigi
1141262152Sluigi        mix(a, b, c);
1142262152Sluigi#define BRIDGE_RTHASH_MASK	(NM_BDG_HASH-1)
1143262152Sluigi        return (c & BRIDGE_RTHASH_MASK);
1144262152Sluigi}
1145262152Sluigi
1146262152Sluigi#undef mix
1147262152Sluigi
1148262152Sluigi
1149270252Sluigi/* nm_register callback for VALE ports */
1150262152Sluigistatic int
1151270252Sluiginetmap_vp_reg(struct netmap_adapter *na, int onoff)
1152262152Sluigi{
1153262152Sluigi	struct netmap_vp_adapter *vpna =
1154262152Sluigi		(struct netmap_vp_adapter*)na;
1155262152Sluigi
1156270252Sluigi	/* persistent ports may be put in netmap mode
1157270252Sluigi	 * before being attached to a bridge
1158262152Sluigi	 */
1159270252Sluigi	if (vpna->na_bdg)
1160270252Sluigi		BDG_WLOCK(vpna->na_bdg);
1161262152Sluigi	if (onoff) {
1162270252Sluigi		na->na_flags |= NAF_NETMAP_ON;
1163270252Sluigi		 /* XXX on FreeBSD, persistent VALE ports should also
1164270252Sluigi		 * toggle IFCAP_NETMAP in na->ifp (2014-03-16)
1165270252Sluigi		 */
1166262152Sluigi	} else {
1167270252Sluigi		na->na_flags &= ~NAF_NETMAP_ON;
1168262152Sluigi	}
1169270252Sluigi	if (vpna->na_bdg)
1170270252Sluigi		BDG_WUNLOCK(vpna->na_bdg);
1171262152Sluigi	return 0;
1172262152Sluigi}
1173262152Sluigi
1174262152Sluigi
1175262152Sluigi/*
1176262152Sluigi * Lookup function for a learning bridge.
1177262152Sluigi * Update the hash table with the source address,
1178262152Sluigi * and then returns the destination port index, and the
1179262152Sluigi * ring in *dst_ring (at the moment, always use ring 0)
1180262152Sluigi */
1181262152Sluigiu_int
1182270252Sluiginetmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
1183270252Sluigi		const struct netmap_vp_adapter *na)
1184262152Sluigi{
1185270252Sluigi	uint8_t *buf = ft->ft_buf;
1186270252Sluigi	u_int buf_len = ft->ft_len;
1187262152Sluigi	struct nm_hash_ent *ht = na->na_bdg->ht;
1188262152Sluigi	uint32_t sh, dh;
1189262152Sluigi	u_int dst, mysrc = na->bdg_port;
1190262152Sluigi	uint64_t smac, dmac;
1191262152Sluigi
1192270252Sluigi	/* safety check, unfortunately we have many cases */
1193270252Sluigi	if (buf_len >= 14 + na->virt_hdr_len) {
1194270252Sluigi		/* virthdr + mac_hdr in the same slot */
1195270252Sluigi		buf += na->virt_hdr_len;
1196270252Sluigi		buf_len -= na->virt_hdr_len;
1197270252Sluigi	} else if (buf_len == na->virt_hdr_len && ft->ft_flags & NS_MOREFRAG) {
1198270252Sluigi		/* only header in first fragment */
1199270252Sluigi		ft++;
1200270252Sluigi		buf = ft->ft_buf;
1201270252Sluigi		buf_len = ft->ft_len;
1202270252Sluigi	} else {
1203270252Sluigi		RD(5, "invalid buf format, length %d", buf_len);
1204262152Sluigi		return NM_BDG_NOPORT;
1205262152Sluigi	}
1206262152Sluigi	dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
1207262152Sluigi	smac = le64toh(*(uint64_t *)(buf + 4));
1208262152Sluigi	smac >>= 16;
1209262152Sluigi
1210262152Sluigi	/*
1211262152Sluigi	 * The hash is somewhat expensive, there might be some
1212262152Sluigi	 * worthwhile optimizations here.
1213262152Sluigi	 */
1214262152Sluigi	if ((buf[6] & 1) == 0) { /* valid src */
1215262152Sluigi		uint8_t *s = buf+6;
1216262152Sluigi		sh = nm_bridge_rthash(s); // XXX hash of source
1217262152Sluigi		/* update source port forwarding entry */
1218262152Sluigi		ht[sh].mac = smac;	/* XXX expire ? */
1219262152Sluigi		ht[sh].ports = mysrc;
1220262152Sluigi		if (netmap_verbose)
1221262152Sluigi		    D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
1222262152Sluigi			s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
1223262152Sluigi	}
1224262152Sluigi	dst = NM_BDG_BROADCAST;
1225262152Sluigi	if ((buf[0] & 1) == 0) { /* unicast */
1226262152Sluigi		dh = nm_bridge_rthash(buf); // XXX hash of dst
1227262152Sluigi		if (ht[dh].mac == dmac) {	/* found dst */
1228262152Sluigi			dst = ht[dh].ports;
1229262152Sluigi		}
1230262152Sluigi		/* XXX otherwise return NM_BDG_UNKNOWN ? */
1231262152Sluigi	}
1232262152Sluigi	*dst_ring = 0;
1233262152Sluigi	return dst;
1234262152Sluigi}
1235262152Sluigi
1236262152Sluigi
1237262152Sluigi/*
1238262152Sluigi * Available space in the ring. Only used in VALE code
1239262152Sluigi * and only with is_rx = 1
1240262152Sluigi */
1241262152Sluigistatic inline uint32_t
1242262152Sluiginm_kr_space(struct netmap_kring *k, int is_rx)
1243262152Sluigi{
1244262152Sluigi	int space;
1245262152Sluigi
1246262152Sluigi	if (is_rx) {
1247262152Sluigi		int busy = k->nkr_hwlease - k->nr_hwcur;
1248262152Sluigi		if (busy < 0)
1249262152Sluigi			busy += k->nkr_num_slots;
1250262152Sluigi		space = k->nkr_num_slots - 1 - busy;
1251262152Sluigi	} else {
1252262152Sluigi		/* XXX never used in this branch */
1253262152Sluigi		space = k->nr_hwtail - k->nkr_hwlease;
1254262152Sluigi		if (space < 0)
1255262152Sluigi			space += k->nkr_num_slots;
1256262152Sluigi	}
1257262152Sluigi#if 0
1258262152Sluigi	// sanity check
1259262152Sluigi	if (k->nkr_hwlease >= k->nkr_num_slots ||
1260262152Sluigi		k->nr_hwcur >= k->nkr_num_slots ||
1261262152Sluigi		k->nr_tail >= k->nkr_num_slots ||
1262262152Sluigi		busy < 0 ||
1263262152Sluigi		busy >= k->nkr_num_slots) {
1264262152Sluigi		D("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d",			k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
1265262152Sluigi			k->nkr_lease_idx, k->nkr_num_slots);
1266262152Sluigi	}
1267262152Sluigi#endif
1268262152Sluigi	return space;
1269262152Sluigi}
1270262152Sluigi
1271262152Sluigi
1272262152Sluigi
1273262152Sluigi
1274262152Sluigi/* make a lease on the kring for N positions. return the
1275262152Sluigi * lease index
1276262152Sluigi * XXX only used in VALE code and with is_rx = 1
1277262152Sluigi */
1278262152Sluigistatic inline uint32_t
1279262152Sluiginm_kr_lease(struct netmap_kring *k, u_int n, int is_rx)
1280262152Sluigi{
1281262152Sluigi	uint32_t lim = k->nkr_num_slots - 1;
1282262152Sluigi	uint32_t lease_idx = k->nkr_lease_idx;
1283262152Sluigi
1284262152Sluigi	k->nkr_leases[lease_idx] = NR_NOSLOT;
1285262152Sluigi	k->nkr_lease_idx = nm_next(lease_idx, lim);
1286262152Sluigi
1287262152Sluigi	if (n > nm_kr_space(k, is_rx)) {
1288262152Sluigi		D("invalid request for %d slots", n);
1289262152Sluigi		panic("x");
1290262152Sluigi	}
1291262152Sluigi	/* XXX verify that there are n slots */
1292262152Sluigi	k->nkr_hwlease += n;
1293262152Sluigi	if (k->nkr_hwlease > lim)
1294262152Sluigi		k->nkr_hwlease -= lim + 1;
1295262152Sluigi
1296262152Sluigi	if (k->nkr_hwlease >= k->nkr_num_slots ||
1297262152Sluigi		k->nr_hwcur >= k->nkr_num_slots ||
1298262152Sluigi		k->nr_hwtail >= k->nkr_num_slots ||
1299262152Sluigi		k->nkr_lease_idx >= k->nkr_num_slots) {
1300262152Sluigi		D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d",
1301270252Sluigi			k->na->name,
1302262152Sluigi			k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
1303262152Sluigi			k->nkr_lease_idx, k->nkr_num_slots);
1304262152Sluigi	}
1305262152Sluigi	return lease_idx;
1306262152Sluigi}
1307262152Sluigi
1308262152Sluigi/*
1309270252Sluigi *
1310262152Sluigi * This flush routine supports only unicast and broadcast but a large
1311262152Sluigi * number of ports, and lets us replace the learn and dispatch functions.
1312262152Sluigi */
1313262152Sluigiint
1314262152Sluiginm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
1315262152Sluigi		u_int ring_nr)
1316262152Sluigi{
1317262152Sluigi	struct nm_bdg_q *dst_ents, *brddst;
1318262152Sluigi	uint16_t num_dsts = 0, *dsts;
1319262152Sluigi	struct nm_bridge *b = na->na_bdg;
1320262152Sluigi	u_int i, j, me = na->bdg_port;
1321262152Sluigi
1322262152Sluigi	/*
1323262152Sluigi	 * The work area (pointed by ft) is followed by an array of
1324262152Sluigi	 * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
1325262152Sluigi	 * queues per port plus one for the broadcast traffic.
1326262152Sluigi	 * Then we have an array of destination indexes.
1327262152Sluigi	 */
1328262152Sluigi	dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
1329262152Sluigi	dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
1330262152Sluigi
1331262152Sluigi	/* first pass: find a destination for each packet in the batch */
1332262152Sluigi	for (i = 0; likely(i < n); i += ft[i].ft_frags) {
1333262152Sluigi		uint8_t dst_ring = ring_nr; /* default, same ring as origin */
1334262152Sluigi		uint16_t dst_port, d_i;
1335262152Sluigi		struct nm_bdg_q *d;
1336262152Sluigi
1337262152Sluigi		ND("slot %d frags %d", i, ft[i].ft_frags);
1338262152Sluigi		/* Drop the packet if the virtio-net header is not into the first
1339262152Sluigi		   fragment nor at the very beginning of the second. */
1340270252Sluigi		if (unlikely(na->virt_hdr_len > ft[i].ft_len))
1341262152Sluigi			continue;
1342270252Sluigi		dst_port = b->bdg_ops.lookup(&ft[i], &dst_ring, na);
1343262152Sluigi		if (netmap_verbose > 255)
1344262152Sluigi			RD(5, "slot %d port %d -> %d", i, me, dst_port);
1345262152Sluigi		if (dst_port == NM_BDG_NOPORT)
1346262152Sluigi			continue; /* this packet is identified to be dropped */
1347262152Sluigi		else if (unlikely(dst_port > NM_BDG_MAXPORTS))
1348262152Sluigi			continue;
1349262152Sluigi		else if (dst_port == NM_BDG_BROADCAST)
1350262152Sluigi			dst_ring = 0; /* broadcasts always go to ring 0 */
1351262152Sluigi		else if (unlikely(dst_port == me ||
1352262152Sluigi		    !b->bdg_ports[dst_port]))
1353262152Sluigi			continue;
1354262152Sluigi
1355262152Sluigi		/* get a position in the scratch pad */
1356262152Sluigi		d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
1357262152Sluigi		d = dst_ents + d_i;
1358262152Sluigi
1359262152Sluigi		/* append the first fragment to the list */
1360262152Sluigi		if (d->bq_head == NM_FT_NULL) { /* new destination */
1361262152Sluigi			d->bq_head = d->bq_tail = i;
1362262152Sluigi			/* remember this position to be scanned later */
1363262152Sluigi			if (dst_port != NM_BDG_BROADCAST)
1364262152Sluigi				dsts[num_dsts++] = d_i;
1365262152Sluigi		} else {
1366262152Sluigi			ft[d->bq_tail].ft_next = i;
1367262152Sluigi			d->bq_tail = i;
1368262152Sluigi		}
1369262152Sluigi		d->bq_len += ft[i].ft_frags;
1370262152Sluigi	}
1371262152Sluigi
1372262152Sluigi	/*
1373262152Sluigi	 * Broadcast traffic goes to ring 0 on all destinations.
1374262152Sluigi	 * So we need to add these rings to the list of ports to scan.
1375262152Sluigi	 * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
1376262152Sluigi	 * expensive. We should keep a compact list of active destinations
1377262152Sluigi	 * so we could shorten this loop.
1378262152Sluigi	 */
1379262152Sluigi	brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
1380262152Sluigi	if (brddst->bq_head != NM_FT_NULL) {
1381262152Sluigi		for (j = 0; likely(j < b->bdg_active_ports); j++) {
1382262152Sluigi			uint16_t d_i;
1383262152Sluigi			i = b->bdg_port_index[j];
1384262152Sluigi			if (unlikely(i == me))
1385262152Sluigi				continue;
1386262152Sluigi			d_i = i * NM_BDG_MAXRINGS;
1387262152Sluigi			if (dst_ents[d_i].bq_head == NM_FT_NULL)
1388262152Sluigi				dsts[num_dsts++] = d_i;
1389262152Sluigi		}
1390262152Sluigi	}
1391262152Sluigi
1392262152Sluigi	ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
1393270252Sluigi	/* second pass: scan destinations */
1394262152Sluigi	for (i = 0; i < num_dsts; i++) {
1395262152Sluigi		struct netmap_vp_adapter *dst_na;
1396262152Sluigi		struct netmap_kring *kring;
1397262152Sluigi		struct netmap_ring *ring;
1398262152Sluigi		u_int dst_nr, lim, j, d_i, next, brd_next;
1399262152Sluigi		u_int needed, howmany;
1400262152Sluigi		int retry = netmap_txsync_retry;
1401262152Sluigi		struct nm_bdg_q *d;
1402262152Sluigi		uint32_t my_start = 0, lease_idx = 0;
1403262152Sluigi		int nrings;
1404262152Sluigi		int virt_hdr_mismatch = 0;
1405262152Sluigi
1406262152Sluigi		d_i = dsts[i];
1407262152Sluigi		ND("second pass %d port %d", i, d_i);
1408262152Sluigi		d = dst_ents + d_i;
1409262152Sluigi		// XXX fix the division
1410262152Sluigi		dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
1411262152Sluigi		/* protect from the lookup function returning an inactive
1412262152Sluigi		 * destination port
1413262152Sluigi		 */
1414262152Sluigi		if (unlikely(dst_na == NULL))
1415262152Sluigi			goto cleanup;
1416262152Sluigi		if (dst_na->up.na_flags & NAF_SW_ONLY)
1417262152Sluigi			goto cleanup;
1418262152Sluigi		/*
1419262152Sluigi		 * The interface may be in !netmap mode in two cases:
1420262152Sluigi		 * - when na is attached but not activated yet;
1421262152Sluigi		 * - when na is being deactivated but is still attached.
1422262152Sluigi		 */
1423270252Sluigi		if (unlikely(!nm_netmap_on(&dst_na->up))) {
1424262152Sluigi			ND("not in netmap mode!");
1425262152Sluigi			goto cleanup;
1426262152Sluigi		}
1427262152Sluigi
1428262152Sluigi		/* there is at least one either unicast or broadcast packet */
1429262152Sluigi		brd_next = brddst->bq_head;
1430262152Sluigi		next = d->bq_head;
1431262152Sluigi		/* we need to reserve this many slots. If fewer are
1432262152Sluigi		 * available, some packets will be dropped.
1433262152Sluigi		 * Packets may have multiple fragments, so we may not use
1434262152Sluigi		 * there is a chance that we may not use all of the slots
1435262152Sluigi		 * we have claimed, so we will need to handle the leftover
1436262152Sluigi		 * ones when we regain the lock.
1437262152Sluigi		 */
1438262152Sluigi		needed = d->bq_len + brddst->bq_len;
1439262152Sluigi
1440262152Sluigi		if (unlikely(dst_na->virt_hdr_len != na->virt_hdr_len)) {
1441270252Sluigi			RD(3, "virt_hdr_mismatch, src %d dst %d", na->virt_hdr_len, dst_na->virt_hdr_len);
1442262152Sluigi			/* There is a virtio-net header/offloadings mismatch between
1443262152Sluigi			 * source and destination. The slower mismatch datapath will
1444262152Sluigi			 * be used to cope with all the mismatches.
1445262152Sluigi			 */
1446262152Sluigi			virt_hdr_mismatch = 1;
1447262152Sluigi			if (dst_na->mfs < na->mfs) {
1448262152Sluigi				/* We may need to do segmentation offloadings, and so
1449262152Sluigi				 * we may need a number of destination slots greater
1450262152Sluigi				 * than the number of input slots ('needed').
1451262152Sluigi				 * We look for the smallest integer 'x' which satisfies:
1452262152Sluigi				 *	needed * na->mfs + x * H <= x * na->mfs
1453262152Sluigi				 * where 'H' is the length of the longest header that may
1454262152Sluigi				 * be replicated in the segmentation process (e.g. for
1455262152Sluigi				 * TCPv4 we must account for ethernet header, IP header
1456262152Sluigi				 * and TCPv4 header).
1457262152Sluigi				 */
1458262152Sluigi				needed = (needed * na->mfs) /
1459262152Sluigi						(dst_na->mfs - WORST_CASE_GSO_HEADER) + 1;
1460262152Sluigi				ND(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed);
1461262152Sluigi			}
1462262152Sluigi		}
1463262152Sluigi
1464262152Sluigi		ND(5, "pass 2 dst %d is %x %s",
1465262152Sluigi			i, d_i, is_vp ? "virtual" : "nic/host");
1466262152Sluigi		dst_nr = d_i & (NM_BDG_MAXRINGS-1);
1467262152Sluigi		nrings = dst_na->up.num_rx_rings;
1468262152Sluigi		if (dst_nr >= nrings)
1469262152Sluigi			dst_nr = dst_nr % nrings;
1470262152Sluigi		kring = &dst_na->up.rx_rings[dst_nr];
1471262152Sluigi		ring = kring->ring;
1472262152Sluigi		lim = kring->nkr_num_slots - 1;
1473262152Sluigi
1474262152Sluigiretry:
1475262152Sluigi
1476262152Sluigi		if (dst_na->retry && retry) {
1477262152Sluigi			/* try to get some free slot from the previous run */
1478262152Sluigi			dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1479270252Sluigi			/* actually useful only for bwraps, since there
1480270252Sluigi			 * the notify will trigger a txsync on the hwna. VALE ports
1481270252Sluigi			 * have dst_na->retry == 0
1482270252Sluigi			 */
1483262152Sluigi		}
1484262152Sluigi		/* reserve the buffers in the queue and an entry
1485262152Sluigi		 * to report completion, and drop lock.
1486262152Sluigi		 * XXX this might become a helper function.
1487262152Sluigi		 */
1488262152Sluigi		mtx_lock(&kring->q_lock);
1489262152Sluigi		if (kring->nkr_stopped) {
1490262152Sluigi			mtx_unlock(&kring->q_lock);
1491262152Sluigi			goto cleanup;
1492262152Sluigi		}
1493262152Sluigi		my_start = j = kring->nkr_hwlease;
1494262152Sluigi		howmany = nm_kr_space(kring, 1);
1495262152Sluigi		if (needed < howmany)
1496262152Sluigi			howmany = needed;
1497262152Sluigi		lease_idx = nm_kr_lease(kring, howmany, 1);
1498262152Sluigi		mtx_unlock(&kring->q_lock);
1499262152Sluigi
1500262152Sluigi		/* only retry if we need more than available slots */
1501262152Sluigi		if (retry && needed <= howmany)
1502262152Sluigi			retry = 0;
1503262152Sluigi
1504262152Sluigi		/* copy to the destination queue */
1505262152Sluigi		while (howmany > 0) {
1506262152Sluigi			struct netmap_slot *slot;
1507262152Sluigi			struct nm_bdg_fwd *ft_p, *ft_end;
1508262152Sluigi			u_int cnt;
1509262152Sluigi
1510262152Sluigi			/* find the queue from which we pick next packet.
1511262152Sluigi			 * NM_FT_NULL is always higher than valid indexes
1512262152Sluigi			 * so we never dereference it if the other list
1513262152Sluigi			 * has packets (and if both are empty we never
1514262152Sluigi			 * get here).
1515262152Sluigi			 */
1516262152Sluigi			if (next < brd_next) {
1517262152Sluigi				ft_p = ft + next;
1518262152Sluigi				next = ft_p->ft_next;
1519262152Sluigi			} else { /* insert broadcast */
1520262152Sluigi				ft_p = ft + brd_next;
1521262152Sluigi				brd_next = ft_p->ft_next;
1522262152Sluigi			}
1523262152Sluigi			cnt = ft_p->ft_frags; // cnt > 0
1524262152Sluigi			if (unlikely(cnt > howmany))
1525262152Sluigi			    break; /* no more space */
1526262152Sluigi			if (netmap_verbose && cnt > 1)
1527262152Sluigi				RD(5, "rx %d frags to %d", cnt, j);
1528262152Sluigi			ft_end = ft_p + cnt;
1529262152Sluigi			if (unlikely(virt_hdr_mismatch)) {
1530262152Sluigi				bdg_mismatch_datapath(na, dst_na, ft_p, ring, &j, lim, &howmany);
1531262152Sluigi			} else {
1532262152Sluigi				howmany -= cnt;
1533262152Sluigi				do {
1534262152Sluigi					char *dst, *src = ft_p->ft_buf;
1535262152Sluigi					size_t copy_len = ft_p->ft_len, dst_len = copy_len;
1536262152Sluigi
1537262152Sluigi					slot = &ring->slot[j];
1538270252Sluigi					dst = NMB(&dst_na->up, slot);
1539262152Sluigi
1540262152Sluigi					ND("send [%d] %d(%d) bytes at %s:%d",
1541262152Sluigi							i, (int)copy_len, (int)dst_len,
1542262152Sluigi							NM_IFPNAME(dst_ifp), j);
1543262152Sluigi					/* round to a multiple of 64 */
1544262152Sluigi					copy_len = (copy_len + 63) & ~63;
1545262152Sluigi
1546270252Sluigi					if (unlikely(copy_len > NETMAP_BUF_SIZE(&dst_na->up) ||
1547270252Sluigi						     copy_len > NETMAP_BUF_SIZE(&na->up))) {
1548267282Sluigi						RD(5, "invalid len %d, down to 64", (int)copy_len);
1549267282Sluigi						copy_len = dst_len = 64; // XXX
1550267282Sluigi					}
1551262152Sluigi					if (ft_p->ft_flags & NS_INDIRECT) {
1552262152Sluigi						if (copyin(src, dst, copy_len)) {
1553262152Sluigi							// invalid user pointer, pretend len is 0
1554262152Sluigi							dst_len = 0;
1555262152Sluigi						}
1556262152Sluigi					} else {
1557262152Sluigi						//memcpy(dst, src, copy_len);
1558262152Sluigi						pkt_copy(src, dst, (int)copy_len);
1559262152Sluigi					}
1560262152Sluigi					slot->len = dst_len;
1561262152Sluigi					slot->flags = (cnt << 8)| NS_MOREFRAG;
1562262152Sluigi					j = nm_next(j, lim);
1563262152Sluigi					needed--;
1564262152Sluigi					ft_p++;
1565262152Sluigi				} while (ft_p != ft_end);
1566262152Sluigi				slot->flags = (cnt << 8); /* clear flag on last entry */
1567262152Sluigi			}
1568262152Sluigi			/* are we done ? */
1569262152Sluigi			if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
1570262152Sluigi				break;
1571262152Sluigi		}
1572262152Sluigi		{
1573262152Sluigi		    /* current position */
1574262152Sluigi		    uint32_t *p = kring->nkr_leases; /* shorthand */
1575262152Sluigi		    uint32_t update_pos;
1576262152Sluigi		    int still_locked = 1;
1577262152Sluigi
1578262152Sluigi		    mtx_lock(&kring->q_lock);
1579262152Sluigi		    if (unlikely(howmany > 0)) {
1580262152Sluigi			/* not used all bufs. If i am the last one
1581262152Sluigi			 * i can recover the slots, otherwise must
1582262152Sluigi			 * fill them with 0 to mark empty packets.
1583262152Sluigi			 */
1584262152Sluigi			ND("leftover %d bufs", howmany);
1585262152Sluigi			if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
1586262152Sluigi			    /* yes i am the last one */
1587262152Sluigi			    ND("roll back nkr_hwlease to %d", j);
1588262152Sluigi			    kring->nkr_hwlease = j;
1589262152Sluigi			} else {
1590262152Sluigi			    while (howmany-- > 0) {
1591262152Sluigi				ring->slot[j].len = 0;
1592262152Sluigi				ring->slot[j].flags = 0;
1593262152Sluigi				j = nm_next(j, lim);
1594262152Sluigi			    }
1595262152Sluigi			}
1596262152Sluigi		    }
1597262152Sluigi		    p[lease_idx] = j; /* report I am done */
1598262152Sluigi
1599262152Sluigi		    update_pos = kring->nr_hwtail;
1600262152Sluigi
1601262152Sluigi		    if (my_start == update_pos) {
1602262152Sluigi			/* all slots before my_start have been reported,
1603262152Sluigi			 * so scan subsequent leases to see if other ranges
1604262152Sluigi			 * have been completed, and to a selwakeup or txsync.
1605262152Sluigi		         */
1606262152Sluigi			while (lease_idx != kring->nkr_lease_idx &&
1607262152Sluigi				p[lease_idx] != NR_NOSLOT) {
1608262152Sluigi			    j = p[lease_idx];
1609262152Sluigi			    p[lease_idx] = NR_NOSLOT;
1610262152Sluigi			    lease_idx = nm_next(lease_idx, lim);
1611262152Sluigi			}
1612262152Sluigi			/* j is the new 'write' position. j != my_start
1613262152Sluigi			 * means there are new buffers to report
1614262152Sluigi			 */
1615262152Sluigi			if (likely(j != my_start)) {
1616262152Sluigi				kring->nr_hwtail = j;
1617262152Sluigi				still_locked = 0;
1618262152Sluigi				mtx_unlock(&kring->q_lock);
1619262152Sluigi				dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1620270252Sluigi				/* this is netmap_notify for VALE ports and
1621270252Sluigi				 * netmap_bwrap_notify for bwrap. The latter will
1622270252Sluigi				 * trigger a txsync on the underlying hwna
1623270252Sluigi				 */
1624270252Sluigi				if (dst_na->retry && retry--) {
1625270252Sluigi					/* XXX this is going to call nm_notify again.
1626270252Sluigi					 * Only useful for bwrap in virtual machines
1627270252Sluigi					 */
1628262152Sluigi					goto retry;
1629270252Sluigi				}
1630262152Sluigi			}
1631262152Sluigi		    }
1632262152Sluigi		    if (still_locked)
1633262152Sluigi			mtx_unlock(&kring->q_lock);
1634262152Sluigi		}
1635262152Sluigicleanup:
1636262152Sluigi		d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
1637262152Sluigi		d->bq_len = 0;
1638262152Sluigi	}
1639262152Sluigi	brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
1640262152Sluigi	brddst->bq_len = 0;
1641262152Sluigi	return 0;
1642262152Sluigi}
1643262152Sluigi
1644270252Sluigi/* nm_txsync callback for VALE ports */
1645262152Sluigistatic int
1646270252Sluiginetmap_vp_txsync(struct netmap_kring *kring, int flags)
1647262152Sluigi{
1648270252Sluigi	struct netmap_vp_adapter *na =
1649270252Sluigi		(struct netmap_vp_adapter *)kring->na;
1650262152Sluigi	u_int done;
1651262152Sluigi	u_int const lim = kring->nkr_num_slots - 1;
1652262152Sluigi	u_int const cur = kring->rcur;
1653262152Sluigi
1654262152Sluigi	if (bridge_batch <= 0) { /* testing only */
1655262152Sluigi		done = cur; // used all
1656262152Sluigi		goto done;
1657262152Sluigi	}
1658270252Sluigi	if (!na->na_bdg) {
1659270252Sluigi		done = cur;
1660270252Sluigi		goto done;
1661270252Sluigi	}
1662262152Sluigi	if (bridge_batch > NM_BDG_BATCH)
1663262152Sluigi		bridge_batch = NM_BDG_BATCH;
1664262152Sluigi
1665270252Sluigi	done = nm_bdg_preflush(kring, cur);
1666262152Sluigidone:
1667262152Sluigi	if (done != cur)
1668262152Sluigi		D("early break at %d/ %d, tail %d", done, cur, kring->nr_hwtail);
1669262152Sluigi	/*
1670262152Sluigi	 * packets between 'done' and 'cur' are left unsent.
1671262152Sluigi	 */
1672262152Sluigi	kring->nr_hwcur = done;
1673262152Sluigi	kring->nr_hwtail = nm_prev(done, lim);
1674262152Sluigi	nm_txsync_finalize(kring);
1675262152Sluigi	if (netmap_verbose)
1676270252Sluigi		D("%s ring %d flags %d", na->up.name, kring->ring_id, flags);
1677262152Sluigi	return 0;
1678262152Sluigi}
1679262152Sluigi
1680262152Sluigi
1681270252Sluigi/* rxsync code used by VALE ports nm_rxsync callback and also
1682270252Sluigi * internally by the brwap
1683262152Sluigi */
1684262152Sluigistatic int
1685270252Sluiginetmap_vp_rxsync_locked(struct netmap_kring *kring, int flags)
1686262152Sluigi{
1687270252Sluigi	struct netmap_adapter *na = kring->na;
1688262152Sluigi	struct netmap_ring *ring = kring->ring;
1689262152Sluigi	u_int nm_i, lim = kring->nkr_num_slots - 1;
1690262152Sluigi	u_int head = nm_rxsync_prologue(kring);
1691262152Sluigi	int n;
1692262152Sluigi
1693262152Sluigi	if (head > lim) {
1694262152Sluigi		D("ouch dangerous reset!!!");
1695262152Sluigi		n = netmap_ring_reinit(kring);
1696262152Sluigi		goto done;
1697262152Sluigi	}
1698262152Sluigi
1699262152Sluigi	/* First part, import newly received packets. */
1700262152Sluigi	/* actually nothing to do here, they are already in the kring */
1701262152Sluigi
1702262152Sluigi	/* Second part, skip past packets that userspace has released. */
1703262152Sluigi	nm_i = kring->nr_hwcur;
1704262152Sluigi	if (nm_i != head) {
1705262152Sluigi		/* consistency check, but nothing really important here */
1706262152Sluigi		for (n = 0; likely(nm_i != head); n++) {
1707262152Sluigi			struct netmap_slot *slot = &ring->slot[nm_i];
1708270252Sluigi			void *addr = NMB(na, slot);
1709262152Sluigi
1710270252Sluigi			if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */
1711262152Sluigi				D("bad buffer index %d, ignore ?",
1712262152Sluigi					slot->buf_idx);
1713262152Sluigi			}
1714262152Sluigi			slot->flags &= ~NS_BUF_CHANGED;
1715262152Sluigi			nm_i = nm_next(nm_i, lim);
1716262152Sluigi		}
1717262152Sluigi		kring->nr_hwcur = head;
1718262152Sluigi	}
1719262152Sluigi
1720262152Sluigi	/* tell userspace that there are new packets */
1721262152Sluigi	nm_rxsync_finalize(kring);
1722262152Sluigi	n = 0;
1723262152Sluigidone:
1724262152Sluigi	return n;
1725262152Sluigi}
1726262152Sluigi
1727262152Sluigi/*
1728270252Sluigi * nm_rxsync callback for VALE ports
1729262152Sluigi * user process reading from a VALE switch.
1730262152Sluigi * Already protected against concurrent calls from userspace,
1731262152Sluigi * but we must acquire the queue's lock to protect against
1732262152Sluigi * writers on the same queue.
1733262152Sluigi */
1734262152Sluigistatic int
1735270252Sluiginetmap_vp_rxsync(struct netmap_kring *kring, int flags)
1736262152Sluigi{
1737262152Sluigi	int n;
1738262152Sluigi
1739262152Sluigi	mtx_lock(&kring->q_lock);
1740270252Sluigi	n = netmap_vp_rxsync_locked(kring, flags);
1741262152Sluigi	mtx_unlock(&kring->q_lock);
1742262152Sluigi	return n;
1743262152Sluigi}
1744262152Sluigi
1745262152Sluigi
1746270252Sluigi/* nm_bdg_attach callback for VALE ports
1747270252Sluigi * The na_vp port is this same netmap_adapter. There is no host port.
1748270252Sluigi */
1749262152Sluigistatic int
1750270252Sluiginetmap_vp_bdg_attach(const char *name, struct netmap_adapter *na)
1751262152Sluigi{
1752270252Sluigi	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
1753270252Sluigi
1754270252Sluigi	if (vpna->na_bdg)
1755270252Sluigi		return EBUSY;
1756270252Sluigi	na->na_vp = vpna;
1757270252Sluigi	strncpy(na->name, name, sizeof(na->name));
1758270252Sluigi	na->na_hostvp = NULL;
1759270252Sluigi	return 0;
1760270252Sluigi}
1761270252Sluigi
1762270252Sluigi/* create a netmap_vp_adapter that describes a VALE port.
1763270252Sluigi * Only persistent VALE ports have a non-null ifp.
1764270252Sluigi */
1765270252Sluigistatic int
1766270252Sluiginetmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, struct netmap_vp_adapter **ret)
1767270252Sluigi{
1768262152Sluigi	struct netmap_vp_adapter *vpna;
1769262152Sluigi	struct netmap_adapter *na;
1770262152Sluigi	int error;
1771262152Sluigi	u_int npipes = 0;
1772262152Sluigi
1773262152Sluigi	vpna = malloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO);
1774262152Sluigi	if (vpna == NULL)
1775262152Sluigi		return ENOMEM;
1776262152Sluigi
1777262152Sluigi 	na = &vpna->up;
1778262152Sluigi
1779262152Sluigi	na->ifp = ifp;
1780270252Sluigi	strncpy(na->name, nmr->nr_name, sizeof(na->name));
1781262152Sluigi
1782262152Sluigi	/* bound checking */
1783262152Sluigi	na->num_tx_rings = nmr->nr_tx_rings;
1784262152Sluigi	nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1785262152Sluigi	nmr->nr_tx_rings = na->num_tx_rings; // write back
1786262152Sluigi	na->num_rx_rings = nmr->nr_rx_rings;
1787262152Sluigi	nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1788262152Sluigi	nmr->nr_rx_rings = na->num_rx_rings; // write back
1789262152Sluigi	nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE,
1790262152Sluigi			1, NM_BDG_MAXSLOTS, NULL);
1791262152Sluigi	na->num_tx_desc = nmr->nr_tx_slots;
1792262152Sluigi	nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE,
1793262152Sluigi			1, NM_BDG_MAXSLOTS, NULL);
1794262152Sluigi	/* validate number of pipes. We want at least 1,
1795262152Sluigi	 * but probably can do with some more.
1796262152Sluigi	 * So let's use 2 as default (when 0 is supplied)
1797262152Sluigi	 */
1798262152Sluigi	npipes = nmr->nr_arg1;
1799262152Sluigi	nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL);
1800262152Sluigi	nmr->nr_arg1 = npipes;	/* write back */
1801262152Sluigi	/* validate extra bufs */
1802262152Sluigi	nm_bound_var(&nmr->nr_arg3, 0, 0,
1803262152Sluigi			128*NM_BDG_MAXSLOTS, NULL);
1804262152Sluigi	na->num_rx_desc = nmr->nr_rx_slots;
1805262152Sluigi	vpna->virt_hdr_len = 0;
1806262152Sluigi	vpna->mfs = 1514;
1807262152Sluigi	/*if (vpna->mfs > netmap_buf_size)  TODO netmap_buf_size is zero??
1808262152Sluigi		vpna->mfs = netmap_buf_size; */
1809262152Sluigi        if (netmap_verbose)
1810262152Sluigi		D("max frame size %u", vpna->mfs);
1811262152Sluigi
1812262152Sluigi	na->na_flags |= NAF_BDG_MAYSLEEP | NAF_MEM_OWNER;
1813270252Sluigi	na->nm_txsync = netmap_vp_txsync;
1814270252Sluigi	na->nm_rxsync = netmap_vp_rxsync;
1815270252Sluigi	na->nm_register = netmap_vp_reg;
1816262152Sluigi	na->nm_krings_create = netmap_vp_krings_create;
1817262152Sluigi	na->nm_krings_delete = netmap_vp_krings_delete;
1818270252Sluigi	na->nm_dtor = netmap_vp_dtor;
1819270252Sluigi	na->nm_mem = netmap_mem_private_new(na->name,
1820262152Sluigi			na->num_tx_rings, na->num_tx_desc,
1821262152Sluigi			na->num_rx_rings, na->num_rx_desc,
1822262152Sluigi			nmr->nr_arg3, npipes, &error);
1823262152Sluigi	if (na->nm_mem == NULL)
1824262152Sluigi		goto err;
1825270252Sluigi	na->nm_bdg_attach = netmap_vp_bdg_attach;
1826262152Sluigi	/* other nmd fields are set in the common routine */
1827262152Sluigi	error = netmap_attach_common(na);
1828262152Sluigi	if (error)
1829262152Sluigi		goto err;
1830270252Sluigi	*ret = vpna;
1831262152Sluigi	return 0;
1832262152Sluigi
1833262152Sluigierr:
1834262152Sluigi	if (na->nm_mem != NULL)
1835262152Sluigi		netmap_mem_private_delete(na->nm_mem);
1836262152Sluigi	free(vpna, M_DEVBUF);
1837262152Sluigi	return error;
1838262152Sluigi}
1839262152Sluigi
1840270252Sluigi/* Bridge wrapper code (bwrap).
1841270252Sluigi * This is used to connect a non-VALE-port netmap_adapter (hwna) to a
1842270252Sluigi * VALE switch.
1843270252Sluigi * The main task is to swap the meaning of tx and rx rings to match the
1844270252Sluigi * expectations of the VALE switch code (see nm_bdg_flush).
1845270252Sluigi *
1846270252Sluigi * The bwrap works by interposing a netmap_bwrap_adapter between the
1847270252Sluigi * rest of the system and the hwna. The netmap_bwrap_adapter looks like
1848270252Sluigi * a netmap_vp_adapter to the rest the system, but, internally, it
1849270252Sluigi * translates all callbacks to what the hwna expects.
1850270252Sluigi *
1851270252Sluigi * Note that we have to intercept callbacks coming from two sides:
1852270252Sluigi *
1853270252Sluigi *  - callbacks coming from the netmap module are intercepted by
1854270252Sluigi *    passing around the netmap_bwrap_adapter instead of the hwna
1855270252Sluigi *
1856270252Sluigi *  - callbacks coming from outside of the netmap module only know
1857270252Sluigi *    about the hwna. This, however, only happens in interrupt
1858270252Sluigi *    handlers, where only the hwna->nm_notify callback is called.
1859270252Sluigi *    What the bwrap does is to overwrite the hwna->nm_notify callback
1860270252Sluigi *    with its own netmap_bwrap_intr_notify.
1861270252Sluigi *    XXX This assumes that the hwna->nm_notify callback was the
1862270252Sluigi *    standard netmap_notify(), as it is the case for nic adapters.
1863270252Sluigi *    Any additional action performed by hwna->nm_notify will not be
1864270252Sluigi *    performed by netmap_bwrap_intr_notify.
1865270252Sluigi *
1866270252Sluigi * Additionally, the bwrap can optionally attach the host rings pair
1867270252Sluigi * of the wrapped adapter to a different port of the switch.
1868270252Sluigi */
1869262152Sluigi
1870270252Sluigi
1871262152Sluigistatic void
1872262152Sluiginetmap_bwrap_dtor(struct netmap_adapter *na)
1873262152Sluigi{
1874262152Sluigi	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
1875262152Sluigi	struct netmap_adapter *hwna = bna->hwna;
1876262152Sluigi
1877262152Sluigi	ND("na %p", na);
1878270252Sluigi	/* drop reference to hwna->ifp.
1879270252Sluigi	 * If we don't do this, netmap_detach_common(na)
1880270252Sluigi	 * will think it has set NA(na->ifp) to NULL
1881270252Sluigi	 */
1882270252Sluigi	na->ifp = NULL;
1883270252Sluigi	/* for safety, also drop the possible reference
1884270252Sluigi	 * in the hostna
1885270252Sluigi	 */
1886270252Sluigi	bna->host.up.ifp = NULL;
1887262152Sluigi
1888270252Sluigi	hwna->nm_mem = bna->save_nmd;
1889262152Sluigi	hwna->na_private = NULL;
1890270252Sluigi	hwna->na_vp = hwna->na_hostvp = NULL;
1891270252Sluigi	hwna->na_flags &= ~NAF_BUSY;
1892262152Sluigi	netmap_adapter_put(hwna);
1893262152Sluigi
1894262152Sluigi}
1895262152Sluigi
1896262152Sluigi
1897262152Sluigi/*
1898262152Sluigi * Intr callback for NICs connected to a bridge.
1899262152Sluigi * Simply ignore tx interrupts (maybe we could try to recover space ?)
1900262152Sluigi * and pass received packets from nic to the bridge.
1901262152Sluigi *
1902262152Sluigi * XXX TODO check locking: this is called from the interrupt
1903262152Sluigi * handler so we should make sure that the interface is not
1904262152Sluigi * disconnected while passing down an interrupt.
1905262152Sluigi *
1906262152Sluigi * Note, no user process can access this NIC or the host stack.
1907262152Sluigi * The only part of the ring that is significant are the slots,
1908262152Sluigi * and head/cur/tail are set from the kring as needed
1909262152Sluigi * (part as a receive ring, part as a transmit ring).
1910262152Sluigi *
1911262152Sluigi * callback that overwrites the hwna notify callback.
1912262152Sluigi * Packets come from the outside or from the host stack and are put on an hwna rx ring.
1913262152Sluigi * The bridge wrapper then sends the packets through the bridge.
1914262152Sluigi */
1915262152Sluigistatic int
1916262152Sluiginetmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, int flags)
1917262152Sluigi{
1918262152Sluigi	struct netmap_bwrap_adapter *bna = na->na_private;
1919262152Sluigi	struct netmap_vp_adapter *hostna = &bna->host;
1920262152Sluigi	struct netmap_kring *kring, *bkring;
1921262152Sluigi	struct netmap_ring *ring;
1922262152Sluigi	int is_host_ring = ring_nr == na->num_rx_rings;
1923262152Sluigi	struct netmap_vp_adapter *vpna = &bna->up;
1924262152Sluigi	int error = 0;
1925262152Sluigi
1926262152Sluigi	if (netmap_verbose)
1927270252Sluigi	    D("%s %s%d 0x%x", na->name,
1928262152Sluigi		(tx == NR_TX ? "TX" : "RX"), ring_nr, flags);
1929262152Sluigi
1930262152Sluigi	if (flags & NAF_DISABLE_NOTIFY) {
1931270252Sluigi		/* the enabled/disabled state of the ring has changed,
1932270252Sluigi		 * propagate the info to the wrapper (with tx/rx swapped)
1933270252Sluigi		 */
1934270252Sluigi		if (tx == NR_TX) {
1935270252Sluigi			netmap_set_rxring(&vpna->up, ring_nr,
1936270252Sluigi					na->tx_rings[ring_nr].nkr_stopped);
1937270252Sluigi		} else {
1938270252Sluigi			netmap_set_txring(&vpna->up, ring_nr,
1939270252Sluigi					na->rx_rings[ring_nr].nkr_stopped);
1940270252Sluigi		}
1941262152Sluigi		return 0;
1942262152Sluigi	}
1943262152Sluigi
1944270252Sluigi	if (!nm_netmap_on(na))
1945262152Sluigi		return 0;
1946262152Sluigi
1947262152Sluigi	/* we only care about receive interrupts */
1948262152Sluigi	if (tx == NR_TX)
1949262152Sluigi		return 0;
1950262152Sluigi
1951262152Sluigi	kring = &na->rx_rings[ring_nr];
1952262152Sluigi	ring = kring->ring;
1953262152Sluigi
1954262152Sluigi	/* make sure the ring is not disabled */
1955262152Sluigi	if (nm_kr_tryget(kring))
1956262152Sluigi		return 0;
1957262152Sluigi
1958262152Sluigi	if (is_host_ring && hostna->na_bdg == NULL) {
1959262152Sluigi		error = bna->save_notify(na, ring_nr, tx, flags);
1960262152Sluigi		goto put_out;
1961262152Sluigi	}
1962262152Sluigi
1963262152Sluigi	/* Here we expect ring->head = ring->cur = ring->tail
1964262152Sluigi	 * because everything has been released from the previous round.
1965262152Sluigi	 * However the ring is shared and we might have info from
1966262152Sluigi	 * the wrong side (the tx ring). Hence we overwrite with
1967262152Sluigi	 * the info from the rx kring.
1968262152Sluigi	 */
1969262152Sluigi	if (netmap_verbose)
1970270252Sluigi	    D("%s head %d cur %d tail %d (kring %d %d %d)",  na->name,
1971262152Sluigi		ring->head, ring->cur, ring->tail,
1972262152Sluigi		kring->rhead, kring->rcur, kring->rtail);
1973262152Sluigi
1974262152Sluigi	ring->head = kring->rhead;
1975262152Sluigi	ring->cur = kring->rcur;
1976262152Sluigi	ring->tail = kring->rtail;
1977262152Sluigi
1978262152Sluigi	if (is_host_ring) {
1979262152Sluigi		vpna = hostna;
1980262152Sluigi		ring_nr = 0;
1981267282Sluigi	}
1982262152Sluigi	/* simulate a user wakeup on the rx ring */
1983262152Sluigi	/* fetch packets that have arrived.
1984262152Sluigi	 * XXX maybe do this in a loop ?
1985262152Sluigi	 */
1986262152Sluigi	error = kring->nm_sync(kring, 0);
1987262152Sluigi	if (error)
1988262152Sluigi		goto put_out;
1989262152Sluigi	if (kring->nr_hwcur == kring->nr_hwtail && netmap_verbose) {
1990262152Sluigi		D("how strange, interrupt with no packets on %s",
1991270252Sluigi			na->name);
1992262152Sluigi		goto put_out;
1993262152Sluigi	}
1994262152Sluigi
1995262152Sluigi	/* new packets are ring->cur to ring->tail, and the bkring
1996262152Sluigi	 * had hwcur == ring->cur. So advance ring->cur to ring->tail
1997262152Sluigi	 * to push all packets out.
1998262152Sluigi	 */
1999262152Sluigi	ring->head = ring->cur = ring->tail;
2000262152Sluigi
2001262152Sluigi	/* also set tail to what the bwrap expects */
2002262152Sluigi	bkring = &vpna->up.tx_rings[ring_nr];
2003262152Sluigi	ring->tail = bkring->nr_hwtail; // rtail too ?
2004262152Sluigi
2005262152Sluigi	/* pass packets to the switch */
2006262152Sluigi	nm_txsync_prologue(bkring); // XXX error checking ?
2007270252Sluigi	netmap_vp_txsync(bkring, flags);
2008262152Sluigi
2009262152Sluigi	/* mark all buffers as released on this ring */
2010262152Sluigi	ring->head = ring->cur = kring->nr_hwtail;
2011262152Sluigi	ring->tail = kring->rtail;
2012262152Sluigi	/* another call to actually release the buffers */
2013262152Sluigi	if (!is_host_ring) {
2014262152Sluigi		error = kring->nm_sync(kring, 0);
2015262152Sluigi	} else {
2016262152Sluigi		/* mark all packets as released, as in the
2017262152Sluigi		 * second part of netmap_rxsync_from_host()
2018262152Sluigi		 */
2019262152Sluigi		kring->nr_hwcur = kring->nr_hwtail;
2020262152Sluigi		nm_rxsync_finalize(kring);
2021262152Sluigi	}
2022262152Sluigi
2023262152Sluigiput_out:
2024262152Sluigi	nm_kr_put(kring);
2025262152Sluigi	return error;
2026262152Sluigi}
2027262152Sluigi
2028262152Sluigi
2029270252Sluigi/* nm_register callback for bwrap */
2030262152Sluigistatic int
2031262152Sluiginetmap_bwrap_register(struct netmap_adapter *na, int onoff)
2032262152Sluigi{
2033262152Sluigi	struct netmap_bwrap_adapter *bna =
2034262152Sluigi		(struct netmap_bwrap_adapter *)na;
2035262152Sluigi	struct netmap_adapter *hwna = bna->hwna;
2036262152Sluigi	struct netmap_vp_adapter *hostna = &bna->host;
2037262152Sluigi	int error;
2038262152Sluigi
2039270252Sluigi	ND("%s %s", na->name, onoff ? "on" : "off");
2040262152Sluigi
2041262152Sluigi	if (onoff) {
2042262152Sluigi		int i;
2043262152Sluigi
2044270252Sluigi		/* netmap_do_regif has been called on the bwrap na.
2045270252Sluigi		 * We need to pass the information about the
2046270252Sluigi		 * memory allocator down to the hwna before
2047270252Sluigi		 * putting it in netmap mode
2048270252Sluigi		 */
2049262152Sluigi		hwna->na_lut = na->na_lut;
2050262152Sluigi		hwna->na_lut_objtotal = na->na_lut_objtotal;
2051270252Sluigi		hwna->na_lut_objsize = na->na_lut_objsize;
2052262152Sluigi
2053262152Sluigi		if (hostna->na_bdg) {
2054270252Sluigi			/* if the host rings have been attached to switch,
2055270252Sluigi			 * we need to copy the memory allocator information
2056270252Sluigi			 * in the hostna also
2057270252Sluigi			 */
2058262152Sluigi			hostna->up.na_lut = na->na_lut;
2059262152Sluigi			hostna->up.na_lut_objtotal = na->na_lut_objtotal;
2060270252Sluigi			hostna->up.na_lut_objsize = na->na_lut_objsize;
2061262152Sluigi		}
2062262152Sluigi
2063262152Sluigi		/* cross-link the netmap rings
2064262152Sluigi		 * The original number of rings comes from hwna,
2065262152Sluigi		 * rx rings on one side equals tx rings on the other.
2066270252Sluigi		 * We need to do this now, after the initialization
2067270252Sluigi		 * of the kring->ring pointers
2068262152Sluigi		 */
2069262152Sluigi		for (i = 0; i < na->num_rx_rings + 1; i++) {
2070262152Sluigi			hwna->tx_rings[i].nkr_num_slots = na->rx_rings[i].nkr_num_slots;
2071262152Sluigi			hwna->tx_rings[i].ring = na->rx_rings[i].ring;
2072262152Sluigi		}
2073262152Sluigi		for (i = 0; i < na->num_tx_rings + 1; i++) {
2074262152Sluigi			hwna->rx_rings[i].nkr_num_slots = na->tx_rings[i].nkr_num_slots;
2075262152Sluigi			hwna->rx_rings[i].ring = na->tx_rings[i].ring;
2076262152Sluigi		}
2077262152Sluigi	}
2078262152Sluigi
2079270252Sluigi	/* forward the request to the hwna */
2080270252Sluigi	error = hwna->nm_register(hwna, onoff);
2081270252Sluigi	if (error)
2082270252Sluigi		return error;
2083262152Sluigi
2084270252Sluigi	/* impersonate a netmap_vp_adapter */
2085270252Sluigi	netmap_vp_reg(na, onoff);
2086270252Sluigi	if (hostna->na_bdg)
2087270252Sluigi		netmap_vp_reg(&hostna->up, onoff);
2088262152Sluigi
2089262152Sluigi	if (onoff) {
2090270252Sluigi		/* intercept the hwna nm_nofify callback */
2091262152Sluigi		bna->save_notify = hwna->nm_notify;
2092262152Sluigi		hwna->nm_notify = netmap_bwrap_intr_notify;
2093262152Sluigi	} else {
2094262152Sluigi		hwna->nm_notify = bna->save_notify;
2095262152Sluigi		hwna->na_lut = NULL;
2096262152Sluigi		hwna->na_lut_objtotal = 0;
2097270252Sluigi		hwna->na_lut_objsize = 0;
2098262152Sluigi	}
2099262152Sluigi
2100262152Sluigi	return 0;
2101262152Sluigi}
2102262152Sluigi
2103270252Sluigi/* nm_config callback for bwrap */
2104262152Sluigistatic int
2105262152Sluiginetmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
2106262152Sluigi				    u_int *rxr, u_int *rxd)
2107262152Sluigi{
2108262152Sluigi	struct netmap_bwrap_adapter *bna =
2109262152Sluigi		(struct netmap_bwrap_adapter *)na;
2110262152Sluigi	struct netmap_adapter *hwna = bna->hwna;
2111262152Sluigi
2112262152Sluigi	/* forward the request */
2113262152Sluigi	netmap_update_config(hwna);
2114262152Sluigi	/* swap the results */
2115262152Sluigi	*txr = hwna->num_rx_rings;
2116262152Sluigi	*txd = hwna->num_rx_desc;
2117262152Sluigi	*rxr = hwna->num_tx_rings;
2118262152Sluigi	*rxd = hwna->num_rx_desc;
2119262152Sluigi
2120262152Sluigi	return 0;
2121262152Sluigi}
2122262152Sluigi
2123262152Sluigi
2124270252Sluigi/* nm_krings_create callback for bwrap */
2125262152Sluigistatic int
2126262152Sluiginetmap_bwrap_krings_create(struct netmap_adapter *na)
2127262152Sluigi{
2128262152Sluigi	struct netmap_bwrap_adapter *bna =
2129262152Sluigi		(struct netmap_bwrap_adapter *)na;
2130262152Sluigi	struct netmap_adapter *hwna = bna->hwna;
2131262152Sluigi	struct netmap_adapter *hostna = &bna->host.up;
2132262152Sluigi	int error;
2133262152Sluigi
2134270252Sluigi	ND("%s", na->name);
2135262152Sluigi
2136270252Sluigi	/* impersonate a netmap_vp_adapter */
2137262152Sluigi	error = netmap_vp_krings_create(na);
2138262152Sluigi	if (error)
2139262152Sluigi		return error;
2140262152Sluigi
2141270252Sluigi	/* also create the hwna krings */
2142262152Sluigi	error = hwna->nm_krings_create(hwna);
2143262152Sluigi	if (error) {
2144262152Sluigi		netmap_vp_krings_delete(na);
2145262152Sluigi		return error;
2146262152Sluigi	}
2147270252Sluigi	/* the connection between the bwrap krings and the hwna krings
2148270252Sluigi	 * will be perfomed later, in the nm_register callback, since
2149270252Sluigi	 * now the kring->ring pointers have not been initialized yet
2150270252Sluigi	 */
2151262152Sluigi
2152262152Sluigi	if (na->na_flags & NAF_HOST_RINGS) {
2153270252Sluigi		/* the hostna rings are the host rings of the bwrap.
2154270252Sluigi		 * The corresponding krings must point back to the
2155270252Sluigi		 * hostna
2156270252Sluigi		 */
2157262152Sluigi		hostna->tx_rings = na->tx_rings + na->num_tx_rings;
2158270252Sluigi		hostna->tx_rings[0].na = hostna;
2159262152Sluigi		hostna->rx_rings = na->rx_rings + na->num_rx_rings;
2160270252Sluigi		hostna->rx_rings[0].na = hostna;
2161262152Sluigi	}
2162262152Sluigi
2163262152Sluigi	return 0;
2164262152Sluigi}
2165262152Sluigi
2166262152Sluigi
2167262152Sluigistatic void
2168262152Sluiginetmap_bwrap_krings_delete(struct netmap_adapter *na)
2169262152Sluigi{
2170262152Sluigi	struct netmap_bwrap_adapter *bna =
2171262152Sluigi		(struct netmap_bwrap_adapter *)na;
2172262152Sluigi	struct netmap_adapter *hwna = bna->hwna;
2173262152Sluigi
2174270252Sluigi	ND("%s", na->name);
2175262152Sluigi
2176262152Sluigi	hwna->nm_krings_delete(hwna);
2177262152Sluigi	netmap_vp_krings_delete(na);
2178262152Sluigi}
2179262152Sluigi
2180262152Sluigi
2181262152Sluigi/* notify method for the bridge-->hwna direction */
2182262152Sluigistatic int
2183262152Sluiginetmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
2184262152Sluigi{
2185262152Sluigi	struct netmap_bwrap_adapter *bna =
2186262152Sluigi		(struct netmap_bwrap_adapter *)na;
2187262152Sluigi	struct netmap_adapter *hwna = bna->hwna;
2188262152Sluigi	struct netmap_kring *kring, *hw_kring;
2189262152Sluigi	struct netmap_ring *ring;
2190262152Sluigi	u_int lim;
2191262152Sluigi	int error = 0;
2192262152Sluigi
2193262152Sluigi	if (tx == NR_TX)
2194262152Sluigi	        return EINVAL;
2195262152Sluigi
2196262152Sluigi	kring = &na->rx_rings[ring_n];
2197262152Sluigi	hw_kring = &hwna->tx_rings[ring_n];
2198262152Sluigi	ring = kring->ring;
2199262152Sluigi	lim = kring->nkr_num_slots - 1;
2200262152Sluigi
2201270252Sluigi	if (!nm_netmap_on(hwna))
2202262152Sluigi		return 0;
2203262152Sluigi	mtx_lock(&kring->q_lock);
2204262152Sluigi	/* first step: simulate a user wakeup on the rx ring */
2205270252Sluigi	netmap_vp_rxsync_locked(kring, flags);
2206262152Sluigi	ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
2207270252Sluigi		na->name, ring_n,
2208262152Sluigi		kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
2209262152Sluigi		ring->head, ring->cur, ring->tail,
2210262152Sluigi		hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail);
2211262152Sluigi	/* second step: the simulated user consumes all new packets */
2212262152Sluigi	ring->head = ring->cur = ring->tail;
2213262152Sluigi
2214262152Sluigi	/* third step: the new packets are sent on the tx ring
2215262152Sluigi	 * (which is actually the same ring)
2216262152Sluigi	 */
2217262152Sluigi	/* set tail to what the hw expects */
2218262152Sluigi	ring->tail = hw_kring->rtail;
2219262152Sluigi	nm_txsync_prologue(&hwna->tx_rings[ring_n]); // XXX error checking ?
2220262152Sluigi	error = hw_kring->nm_sync(hw_kring, flags);
2221262152Sluigi
2222262152Sluigi	/* fourth step: now we are back the rx ring */
2223262152Sluigi	/* claim ownership on all hw owned bufs */
2224262152Sluigi	ring->head = nm_next(ring->tail, lim); /* skip past reserved slot */
2225262152Sluigi	ring->tail = kring->rtail; /* restore saved value of tail, for safety */
2226262152Sluigi
2227262152Sluigi	/* fifth step: the user goes to sleep again, causing another rxsync */
2228270252Sluigi	netmap_vp_rxsync_locked(kring, flags);
2229262152Sluigi	ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
2230270252Sluigi		na->name, ring_n,
2231262152Sluigi		kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
2232262152Sluigi		ring->head, ring->cur, ring->tail,
2233262152Sluigi		hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
2234262152Sluigi	mtx_unlock(&kring->q_lock);
2235262152Sluigi	return error;
2236262152Sluigi}
2237262152Sluigi
2238262152Sluigi
2239270252Sluigi/* notify method for the bridge-->host-rings path */
2240262152Sluigistatic int
2241262152Sluiginetmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
2242262152Sluigi{
2243262152Sluigi	struct netmap_bwrap_adapter *bna = na->na_private;
2244262152Sluigi	struct netmap_adapter *port_na = &bna->up.up;
2245262152Sluigi	if (tx == NR_TX || ring_n != 0)
2246262152Sluigi		return EINVAL;
2247262152Sluigi	return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags);
2248262152Sluigi}
2249262152Sluigi
2250262152Sluigi
2251270252Sluigi/* nm_bdg_ctl callback for the bwrap.
2252270252Sluigi * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd].
2253270252Sluigi * On attach, it needs to provide a fake netmap_priv_d structure and
2254270252Sluigi * perform a netmap_do_regif() on the bwrap. This will put both the
2255270252Sluigi * bwrap and the hwna in netmap mode, with the netmap rings shared
2256270252Sluigi * and cross linked. Moroever, it will start intercepting interrupts
2257270252Sluigi * directed to hwna.
2258270252Sluigi */
2259262152Sluigistatic int
2260270252Sluiginetmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
2261262152Sluigi{
2262270252Sluigi	struct netmap_priv_d *npriv;
2263270252Sluigi	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
2264270252Sluigi	struct netmap_if *nifp;
2265270252Sluigi	int error = 0;
2266270252Sluigi
2267270252Sluigi	if (attach) {
2268270252Sluigi		if (NETMAP_OWNED_BY_ANY(na)) {
2269270252Sluigi			return EBUSY;
2270270252Sluigi		}
2271270252Sluigi		if (bna->na_kpriv) {
2272270252Sluigi			/* nothing to do */
2273270252Sluigi			return 0;
2274270252Sluigi		}
2275270252Sluigi		npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO);
2276270252Sluigi		if (npriv == NULL)
2277270252Sluigi			return ENOMEM;
2278270252Sluigi		nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags, &error);
2279270252Sluigi		if (!nifp) {
2280270252Sluigi			bzero(npriv, sizeof(*npriv));
2281270252Sluigi			free(npriv, M_DEVBUF);
2282270252Sluigi			return error;
2283270252Sluigi		}
2284270252Sluigi		bna->na_kpriv = npriv;
2285270252Sluigi		na->na_flags |= NAF_BUSY;
2286270252Sluigi	} else {
2287270252Sluigi		int last_instance;
2288270252Sluigi
2289270252Sluigi		if (na->active_fds == 0) /* not registered */
2290270252Sluigi			return EINVAL;
2291270252Sluigi		last_instance = netmap_dtor_locked(bna->na_kpriv);
2292270252Sluigi		if (!last_instance) {
2293270252Sluigi			D("--- error, trying to detach an entry with active mmaps");
2294270252Sluigi			error = EINVAL;
2295270252Sluigi		} else {
2296270252Sluigi			struct nm_bridge *b = bna->up.na_bdg,
2297270252Sluigi				*bh = bna->host.na_bdg;
2298270252Sluigi			npriv = bna->na_kpriv;
2299270252Sluigi			bna->na_kpriv = NULL;
2300270252Sluigi			D("deleting priv");
2301270252Sluigi
2302270252Sluigi			bzero(npriv, sizeof(*npriv));
2303270252Sluigi			free(npriv, M_DEVBUF);
2304270252Sluigi			if (b) {
2305270252Sluigi				/* XXX the bwrap dtor should take care
2306270252Sluigi				 * of this (2014-06-16)
2307270252Sluigi				 */
2308270252Sluigi				netmap_bdg_detach_common(b, bna->up.bdg_port,
2309270252Sluigi				    (bh ? bna->host.bdg_port : -1));
2310270252Sluigi			}
2311270252Sluigi			na->na_flags &= ~NAF_BUSY;
2312270252Sluigi		}
2313270252Sluigi	}
2314270252Sluigi	return error;
2315270252Sluigi
2316270252Sluigi}
2317270252Sluigi
2318270252Sluigi/* attach a bridge wrapper to the 'real' device */
2319270252Sluigiint
2320270252Sluiginetmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
2321270252Sluigi{
2322262152Sluigi	struct netmap_bwrap_adapter *bna;
2323270252Sluigi	struct netmap_adapter *na = NULL;
2324270252Sluigi	struct netmap_adapter *hostna = NULL;
2325270252Sluigi	int error = 0;
2326262152Sluigi
2327270252Sluigi	/* make sure the NIC is not already in use */
2328270252Sluigi	if (NETMAP_OWNED_BY_ANY(hwna)) {
2329270252Sluigi		D("NIC %s busy, cannot attach to bridge", hwna->name);
2330270252Sluigi		return EBUSY;
2331270252Sluigi	}
2332262152Sluigi
2333262152Sluigi	bna = malloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO);
2334270252Sluigi	if (bna == NULL) {
2335262152Sluigi		return ENOMEM;
2336270252Sluigi	}
2337262152Sluigi
2338262152Sluigi	na = &bna->up.up;
2339270252Sluigi	strncpy(na->name, nr_name, sizeof(na->name));
2340262152Sluigi	/* fill the ring data for the bwrap adapter with rx/tx meanings
2341262152Sluigi	 * swapped. The real cross-linking will be done during register,
2342262152Sluigi	 * when all the krings will have been created.
2343262152Sluigi	 */
2344262152Sluigi	na->num_rx_rings = hwna->num_tx_rings;
2345262152Sluigi	na->num_tx_rings = hwna->num_rx_rings;
2346262152Sluigi	na->num_tx_desc = hwna->num_rx_desc;
2347262152Sluigi	na->num_rx_desc = hwna->num_tx_desc;
2348262152Sluigi	na->nm_dtor = netmap_bwrap_dtor;
2349262152Sluigi	na->nm_register = netmap_bwrap_register;
2350262152Sluigi	// na->nm_txsync = netmap_bwrap_txsync;
2351262152Sluigi	// na->nm_rxsync = netmap_bwrap_rxsync;
2352262152Sluigi	na->nm_config = netmap_bwrap_config;
2353262152Sluigi	na->nm_krings_create = netmap_bwrap_krings_create;
2354262152Sluigi	na->nm_krings_delete = netmap_bwrap_krings_delete;
2355262152Sluigi	na->nm_notify = netmap_bwrap_notify;
2356270252Sluigi	na->nm_bdg_ctl = netmap_bwrap_bdg_ctl;
2357270252Sluigi	na->pdev = hwna->pdev;
2358270252Sluigi	na->nm_mem = netmap_mem_private_new(na->name,
2359270252Sluigi			na->num_tx_rings, na->num_tx_desc,
2360270252Sluigi			na->num_rx_rings, na->num_rx_desc,
2361270252Sluigi			0, 0, &error);
2362270252Sluigi	na->na_flags |= NAF_MEM_OWNER;
2363270252Sluigi	if (na->nm_mem == NULL)
2364270252Sluigi		goto err_put;
2365262152Sluigi	bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
2366262152Sluigi
2367262152Sluigi	bna->hwna = hwna;
2368262152Sluigi	netmap_adapter_get(hwna);
2369262152Sluigi	hwna->na_private = bna; /* weak reference */
2370270252Sluigi	hwna->na_vp = &bna->up;
2371270252Sluigi
2372262152Sluigi	if (hwna->na_flags & NAF_HOST_RINGS) {
2373270252Sluigi		if (hwna->na_flags & NAF_SW_ONLY)
2374270252Sluigi			na->na_flags |= NAF_SW_ONLY;
2375262152Sluigi		na->na_flags |= NAF_HOST_RINGS;
2376262152Sluigi		hostna = &bna->host.up;
2377270252Sluigi		snprintf(hostna->name, sizeof(hostna->name), "%s^", nr_name);
2378262152Sluigi		hostna->ifp = hwna->ifp;
2379262152Sluigi		hostna->num_tx_rings = 1;
2380262152Sluigi		hostna->num_tx_desc = hwna->num_rx_desc;
2381262152Sluigi		hostna->num_rx_rings = 1;
2382262152Sluigi		hostna->num_rx_desc = hwna->num_tx_desc;
2383262152Sluigi		// hostna->nm_txsync = netmap_bwrap_host_txsync;
2384262152Sluigi		// hostna->nm_rxsync = netmap_bwrap_host_rxsync;
2385262152Sluigi		hostna->nm_notify = netmap_bwrap_host_notify;
2386262152Sluigi		hostna->nm_mem = na->nm_mem;
2387262152Sluigi		hostna->na_private = bna;
2388270252Sluigi		hostna->na_vp = &bna->up;
2389270252Sluigi		na->na_hostvp = hwna->na_hostvp =
2390270252Sluigi			hostna->na_hostvp = &bna->host;
2391270252Sluigi		hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */
2392262152Sluigi	}
2393262152Sluigi
2394262152Sluigi	ND("%s<->%s txr %d txd %d rxr %d rxd %d",
2395270252Sluigi		na->name, ifp->if_xname,
2396262152Sluigi		na->num_tx_rings, na->num_tx_desc,
2397262152Sluigi		na->num_rx_rings, na->num_rx_desc);
2398262152Sluigi
2399262152Sluigi	error = netmap_attach_common(na);
2400262152Sluigi	if (error) {
2401270252Sluigi		goto err_free;
2402262152Sluigi	}
2403270252Sluigi	/* make bwrap ifp point to the real ifp
2404270252Sluigi	 * NOTE: netmap_attach_common() interprets a non-NULL na->ifp
2405270252Sluigi	 * as a request to make the ifp point to the na. Since we
2406270252Sluigi	 * do not want to change the na already pointed to by hwna->ifp,
2407270252Sluigi	 * the following assignment has to be delayed until now
2408270252Sluigi	 */
2409270252Sluigi	na->ifp = hwna->ifp;
2410270252Sluigi	hwna->na_flags |= NAF_BUSY;
2411270252Sluigi	/* make hwna point to the allocator we are actually using,
2412270252Sluigi	 * so that monitors will be able to find it
2413270252Sluigi	 */
2414270252Sluigi	bna->save_nmd = hwna->nm_mem;
2415270252Sluigi	hwna->nm_mem = na->nm_mem;
2416262152Sluigi	return 0;
2417270252Sluigi
2418270252Sluigierr_free:
2419270252Sluigi	netmap_mem_private_delete(na->nm_mem);
2420270252Sluigierr_put:
2421270252Sluigi	hwna->na_vp = hwna->na_hostvp = NULL;
2422270252Sluigi	netmap_adapter_put(hwna);
2423270252Sluigi	free(bna, M_DEVBUF);
2424270252Sluigi	return error;
2425270252Sluigi
2426262152Sluigi}
2427262152Sluigi
2428262152Sluigi
2429262152Sluigivoid
2430262152Sluiginetmap_init_bridges(void)
2431262152Sluigi{
2432262152Sluigi	int i;
2433262152Sluigi	bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */
2434262152Sluigi	for (i = 0; i < NM_BRIDGES; i++)
2435262152Sluigi		BDG_RWINIT(&nm_bridges[i]);
2436262152Sluigi}
2437262152Sluigi#endif /* WITH_VALE */
2438