1331722Seadler/*
2341477Svmaffione * Copyright (C) 2013-2016 Universita` di Pisa
3341477Svmaffione * All rights reserved.
4259412Sluigi *
5259412Sluigi * Redistribution and use in source and binary forms, with or without
6259412Sluigi * modification, are permitted provided that the following conditions
7259412Sluigi * are met:
8259412Sluigi *   1. Redistributions of source code must retain the above copyright
9259412Sluigi *      notice, this list of conditions and the following disclaimer.
10259412Sluigi *   2. Redistributions in binary form must reproduce the above copyright
11259412Sluigi *      notice, this list of conditions and the following disclaimer in the
12259412Sluigi *      documentation and/or other materials provided with the distribution.
13259412Sluigi *
14259412Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15259412Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16259412Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17259412Sluigi * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18259412Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19259412Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20259412Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21259412Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22259412Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23259412Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24259412Sluigi * SUCH DAMAGE.
25259412Sluigi */
26259412Sluigi
27259412Sluigi
28259412Sluigi#if defined(__FreeBSD__)
29259412Sluigi#include <sys/cdefs.h> /* prerequisite */
30259412Sluigi__FBSDID("$FreeBSD: stable/11/sys/dev/netmap/netmap_vale.c 344047 2019-02-12 09:26:05Z vmaffione $");
31259412Sluigi
32259412Sluigi#include <sys/types.h>
33259412Sluigi#include <sys/errno.h>
34259412Sluigi#include <sys/param.h>	/* defines used in kernel.h */
35259412Sluigi#include <sys/kernel.h>	/* types used in module initialization */
36259412Sluigi#include <sys/conf.h>	/* cdevsw struct, UID, GID */
37259412Sluigi#include <sys/sockio.h>
38259412Sluigi#include <sys/socketvar.h>	/* struct socket */
39259412Sluigi#include <sys/malloc.h>
40259412Sluigi#include <sys/poll.h>
41259412Sluigi#include <sys/rwlock.h>
42259412Sluigi#include <sys/socket.h> /* sockaddrs */
43259412Sluigi#include <sys/selinfo.h>
44259412Sluigi#include <sys/sysctl.h>
45259412Sluigi#include <net/if.h>
46259412Sluigi#include <net/if_var.h>
47259412Sluigi#include <net/bpf.h>		/* BIOCIMMEDIATE */
48259412Sluigi#include <machine/bus.h>	/* bus_dmamap_* */
49259412Sluigi#include <sys/endian.h>
50259412Sluigi#include <sys/refcount.h>
51341477Svmaffione#include <sys/smp.h>
52259412Sluigi
53259412Sluigi
54259412Sluigi#elif defined(linux)
55259412Sluigi
56259412Sluigi#include "bsd_glue.h"
57259412Sluigi
58259412Sluigi#elif defined(__APPLE__)
59259412Sluigi
60259412Sluigi#warning OSX support is only partial
61259412Sluigi#include "osx_glue.h"
62259412Sluigi
63341477Svmaffione#elif defined(_WIN32)
64341477Svmaffione#include "win_glue.h"
65341477Svmaffione
66259412Sluigi#else
67259412Sluigi
68259412Sluigi#error	Unsupported platform
69259412Sluigi
70259412Sluigi#endif /* unsupported */
71259412Sluigi
72259412Sluigi/*
73259412Sluigi * common headers
74259412Sluigi */
75259412Sluigi
76259412Sluigi#include <net/netmap.h>
77259412Sluigi#include <dev/netmap/netmap_kern.h>
78259412Sluigi#include <dev/netmap/netmap_mem2.h>
79341477Svmaffione#include <dev/netmap/netmap_bdg.h>
80259412Sluigi
81259412Sluigi#ifdef WITH_VALE
82259412Sluigi
83259412Sluigi/*
84259412Sluigi * system parameters (most of them in netmap_kern.h)
85341477Svmaffione * NM_BDG_NAME	prefix for switch port names, default "vale"
86259412Sluigi * NM_BDG_MAXPORTS	number of ports
87259412Sluigi * NM_BRIDGES	max number of switches in the system.
88259412Sluigi *	XXX should become a sysctl or tunable
89259412Sluigi *
90259412Sluigi * Switch ports are named valeX:Y where X is the switch name and Y
91259412Sluigi * is the port. If Y matches a physical interface name, the port is
92259412Sluigi * connected to a physical device.
93259412Sluigi *
94259412Sluigi * Unlike physical interfaces, switch ports use their own memory region
95259412Sluigi * for rings and buffers.
96259412Sluigi * The virtual interfaces use per-queue lock instead of core lock.
97259412Sluigi * In the tx loop, we aggregate traffic in batches to make all operations
98259412Sluigi * faster. The batch size is bridge_batch.
99259412Sluigi */
100259412Sluigi#define NM_BDG_MAXRINGS		16	/* XXX unclear how many. */
101259412Sluigi#define NM_BDG_MAXSLOTS		4096	/* XXX same as above */
102259412Sluigi#define NM_BRIDGE_RINGSIZE	1024	/* in the device */
103259412Sluigi#define NM_BDG_BATCH		1024	/* entries in the forwarding buffer */
104259412Sluigi/* actual size of the tables */
105341477Svmaffione#define NM_BDG_BATCH_MAX	(NM_BDG_BATCH + NETMAP_MAX_FRAGS)
106259412Sluigi/* NM_FT_NULL terminates a list of slots in the ft */
107259412Sluigi#define NM_FT_NULL		NM_BDG_BATCH_MAX
108259412Sluigi
109259412Sluigi
110259412Sluigi/*
111259412Sluigi * bridge_batch is set via sysctl to the max batch size to be
112259412Sluigi * used in the bridge. The actual value may be larger as the
113259412Sluigi * last packet in the block may overflow the size.
114259412Sluigi */
115341477Svmaffionestatic int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
116341477SvmaffioneSYSBEGIN(vars_vale);
117259412SluigiSYSCTL_DECL(_dev_netmap);
118341477SvmaffioneSYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0,
119341477Svmaffione		"Max batch size to be used in the bridge");
120341477SvmaffioneSYSEND;
121259412Sluigi
122342033Svmaffionestatic int netmap_vale_vp_create(struct nmreq_header *hdr, struct ifnet *,
123341477Svmaffione		struct netmap_mem_d *nmd, struct netmap_vp_adapter **);
124342033Svmaffionestatic int netmap_vale_vp_bdg_attach(const char *, struct netmap_adapter *,
125341477Svmaffione		struct nm_bridge *);
126341477Svmaffionestatic int netmap_vale_bwrap_attach(const char *, struct netmap_adapter *);
127259412Sluigi
128259412Sluigi/*
129342033Svmaffione * For each output interface, nm_vale_q is used to construct a list.
130259412Sluigi * bq_len is the number of output buffers (we can have coalescing
131259412Sluigi * during the copy).
132259412Sluigi */
133342033Svmaffionestruct nm_vale_q {
134259412Sluigi	uint16_t bq_head;
135259412Sluigi	uint16_t bq_tail;
136259412Sluigi	uint32_t bq_len;	/* number of buffers */
137259412Sluigi};
138259412Sluigi
139341477Svmaffione/* Holds the default callbacks */
140341477Svmaffionestruct netmap_bdg_ops vale_bdg_ops = {
141342033Svmaffione	.lookup = netmap_vale_learning,
142341477Svmaffione	.config = NULL,
143341477Svmaffione	.dtor = NULL,
144342033Svmaffione	.vp_create = netmap_vale_vp_create,
145341477Svmaffione	.bwrap_attach = netmap_vale_bwrap_attach,
146341477Svmaffione	.name = NM_BDG_NAME,
147259412Sluigi};
148259412Sluigi
149259412Sluigi/*
150259412Sluigi * this is a slightly optimized copy routine which rounds
151259412Sluigi * to multiple of 64 bytes and is often faster than dealing
152259412Sluigi * with other odd sizes. We assume there is enough room
153259412Sluigi * in the source and destination buffers.
154259412Sluigi *
155259412Sluigi * XXX only for multiples of 64 bytes, non overlapped.
156259412Sluigi */
157259412Sluigistatic inline void
158259412Sluigipkt_copy(void *_src, void *_dst, int l)
159259412Sluigi{
160341477Svmaffione	uint64_t *src = _src;
161341477Svmaffione	uint64_t *dst = _dst;
162341477Svmaffione	if (unlikely(l >= 1024)) {
163341477Svmaffione		memcpy(dst, src, l);
164341477Svmaffione		return;
165259412Sluigi	}
166341477Svmaffione	for (; likely(l > 0); l-=64) {
167341477Svmaffione		*dst++ = *src++;
168341477Svmaffione		*dst++ = *src++;
169341477Svmaffione		*dst++ = *src++;
170341477Svmaffione		*dst++ = *src++;
171341477Svmaffione		*dst++ = *src++;
172341477Svmaffione		*dst++ = *src++;
173341477Svmaffione		*dst++ = *src++;
174341477Svmaffione		*dst++ = *src++;
175259412Sluigi	}
176259412Sluigi}
177259412Sluigi
178259412Sluigi
179259412Sluigi/*
180259412Sluigi * Free the forwarding tables for rings attached to switch ports.
181259412Sluigi */
182259412Sluigistatic void
183259412Sluiginm_free_bdgfwd(struct netmap_adapter *na)
184259412Sluigi{
185259412Sluigi	int nrings, i;
186341477Svmaffione	struct netmap_kring **kring;
187259412Sluigi
188259412Sluigi	NMG_LOCK_ASSERT();
189260368Sluigi	nrings = na->num_tx_rings;
190260368Sluigi	kring = na->tx_rings;
191259412Sluigi	for (i = 0; i < nrings; i++) {
192341477Svmaffione		if (kring[i]->nkr_ft) {
193341477Svmaffione			nm_os_free(kring[i]->nkr_ft);
194341477Svmaffione			kring[i]->nkr_ft = NULL; /* protect from freeing twice */
195259412Sluigi		}
196259412Sluigi	}
197259412Sluigi}
198259412Sluigi
199259412Sluigi
200259412Sluigi/*
201259412Sluigi * Allocate the forwarding tables for the rings attached to the bridge ports.
202259412Sluigi */
203259412Sluigistatic int
204259412Sluiginm_alloc_bdgfwd(struct netmap_adapter *na)
205259412Sluigi{
206259412Sluigi	int nrings, l, i, num_dstq;
207341477Svmaffione	struct netmap_kring **kring;
208259412Sluigi
209259412Sluigi	NMG_LOCK_ASSERT();
210259412Sluigi	/* all port:rings + broadcast */
211259412Sluigi	num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
212259412Sluigi	l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
213342033Svmaffione	l += sizeof(struct nm_vale_q) * num_dstq;
214259412Sluigi	l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
215259412Sluigi
216285349Sluigi	nrings = netmap_real_rings(na, NR_TX);
217259412Sluigi	kring = na->tx_rings;
218259412Sluigi	for (i = 0; i < nrings; i++) {
219259412Sluigi		struct nm_bdg_fwd *ft;
220342033Svmaffione		struct nm_vale_q *dstq;
221259412Sluigi		int j;
222259412Sluigi
223341477Svmaffione		ft = nm_os_malloc(l);
224259412Sluigi		if (!ft) {
225259412Sluigi			nm_free_bdgfwd(na);
226259412Sluigi			return ENOMEM;
227259412Sluigi		}
228342033Svmaffione		dstq = (struct nm_vale_q *)(ft + NM_BDG_BATCH_MAX);
229259412Sluigi		for (j = 0; j < num_dstq; j++) {
230259412Sluigi			dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
231259412Sluigi			dstq[j].bq_len = 0;
232259412Sluigi		}
233341477Svmaffione		kring[i]->nkr_ft = ft;
234259412Sluigi	}
235259412Sluigi	return 0;
236259412Sluigi}
237259412Sluigi
238341477Svmaffione/* Allows external modules to create bridges in exclusive mode,
239341477Svmaffione * returns an authentication token that the external module will need
240341477Svmaffione * to provide during nm_bdg_ctl_{attach, detach}(), netmap_bdg_regops(),
241341477Svmaffione * and nm_bdg_update_private_data() operations.
242341477Svmaffione * Successfully executed if ret != NULL and *return_status == 0.
243270063Sluigi */
244341477Svmaffionevoid *
245341477Svmaffionenetmap_vale_create(const char *bdg_name, int *return_status)
246259412Sluigi{
247341477Svmaffione	struct nm_bridge *b = NULL;
248341477Svmaffione	void *ret = NULL;
249259412Sluigi
250341477Svmaffione	NMG_LOCK();
251341477Svmaffione	b = nm_find_bridge(bdg_name, 0 /* don't create */, NULL);
252270063Sluigi	if (b) {
253341477Svmaffione		*return_status = EEXIST;
254341477Svmaffione		goto unlock_bdg_create;
255270063Sluigi	}
256270063Sluigi
257341477Svmaffione	b = nm_find_bridge(bdg_name, 1 /* create */, &vale_bdg_ops);
258341477Svmaffione	if (!b) {
259341477Svmaffione		*return_status = ENOMEM;
260341477Svmaffione		goto unlock_bdg_create;
261259412Sluigi	}
262259412Sluigi
263341477Svmaffione	b->bdg_flags |= NM_BDG_ACTIVE | NM_BDG_EXCLUSIVE;
264341477Svmaffione	ret = nm_bdg_get_auth_token(b);
265341477Svmaffione	*return_status = 0;
266260368Sluigi
267341477Svmaffioneunlock_bdg_create:
268270063Sluigi	NMG_UNLOCK();
269341477Svmaffione	return ret;
270270063Sluigi}
271270063Sluigi
272341477Svmaffione/* Allows external modules to destroy a bridge created through
273341477Svmaffione * netmap_bdg_create(), the bridge must be empty.
274270063Sluigi */
275341477Svmaffioneint
276341477Svmaffionenetmap_vale_destroy(const char *bdg_name, void *auth_token)
277270063Sluigi{
278341477Svmaffione	struct nm_bridge *b = NULL;
279341477Svmaffione	int ret = 0;
280270063Sluigi
281270063Sluigi	NMG_LOCK();
282341477Svmaffione	b = nm_find_bridge(bdg_name, 0 /* don't create */, NULL);
283341477Svmaffione	if (!b) {
284341477Svmaffione		ret = ENXIO;
285341477Svmaffione		goto unlock_bdg_free;
286270063Sluigi	}
287270063Sluigi
288341477Svmaffione	if (!nm_bdg_valid_auth_token(b, auth_token)) {
289341477Svmaffione		ret = EACCES;
290341477Svmaffione		goto unlock_bdg_free;
291259412Sluigi	}
292341477Svmaffione	if (!(b->bdg_flags & NM_BDG_EXCLUSIVE)) {
293341477Svmaffione		ret = EINVAL;
294341477Svmaffione		goto unlock_bdg_free;
295259412Sluigi	}
296259412Sluigi
297341477Svmaffione	b->bdg_flags &= ~(NM_BDG_EXCLUSIVE | NM_BDG_ACTIVE);
298341477Svmaffione	ret = netmap_bdg_free(b);
299341477Svmaffione	if (ret) {
300341477Svmaffione		b->bdg_flags |= NM_BDG_EXCLUSIVE | NM_BDG_ACTIVE;
301259412Sluigi	}
302259412Sluigi
303341477Svmaffioneunlock_bdg_free:
304341477Svmaffione	NMG_UNLOCK();
305341477Svmaffione	return ret;
306259412Sluigi}
307259412Sluigi
308342033Svmaffione/* Process NETMAP_REQ_VALE_LIST. */
309342033Svmaffioneint
310342033Svmaffionenetmap_vale_list(struct nmreq_header *hdr)
311342033Svmaffione{
312342033Svmaffione	struct nmreq_vale_list *req =
313342033Svmaffione		(struct nmreq_vale_list *)(uintptr_t)hdr->nr_body;
314342033Svmaffione	int namelen = strlen(hdr->nr_name);
315342033Svmaffione	struct nm_bridge *b, *bridges;
316342033Svmaffione	struct netmap_vp_adapter *vpna;
317342033Svmaffione	int error = 0, i, j;
318342033Svmaffione	u_int num_bridges;
319259412Sluigi
320342033Svmaffione	netmap_bns_getbridges(&bridges, &num_bridges);
321259412Sluigi
322342033Svmaffione	/* this is used to enumerate bridges and ports */
323342033Svmaffione	if (namelen) { /* look up indexes of bridge and port */
324342033Svmaffione		if (strncmp(hdr->nr_name, NM_BDG_NAME,
325342033Svmaffione					strlen(NM_BDG_NAME))) {
326342033Svmaffione			return EINVAL;
327342033Svmaffione		}
328342033Svmaffione		NMG_LOCK();
329342033Svmaffione		b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
330342033Svmaffione		if (!b) {
331342033Svmaffione			NMG_UNLOCK();
332342033Svmaffione			return ENOENT;
333342033Svmaffione		}
334342033Svmaffione
335342033Svmaffione		req->nr_bridge_idx = b - bridges; /* bridge index */
336342033Svmaffione		req->nr_port_idx = NM_BDG_NOPORT;
337342033Svmaffione		for (j = 0; j < b->bdg_active_ports; j++) {
338342033Svmaffione			i = b->bdg_port_index[j];
339342033Svmaffione			vpna = b->bdg_ports[i];
340342033Svmaffione			if (vpna == NULL) {
341342033Svmaffione				nm_prerr("This should not happen");
342342033Svmaffione				continue;
343342033Svmaffione			}
344342033Svmaffione			/* the former and the latter identify a
345342033Svmaffione			 * virtual port and a NIC, respectively
346342033Svmaffione			 */
347342033Svmaffione			if (!strcmp(vpna->up.name, hdr->nr_name)) {
348342033Svmaffione				req->nr_port_idx = i; /* port index */
349342033Svmaffione				break;
350342033Svmaffione			}
351342033Svmaffione		}
352342033Svmaffione		NMG_UNLOCK();
353342033Svmaffione	} else {
354342033Svmaffione		/* return the first non-empty entry starting from
355342033Svmaffione		 * bridge nr_arg1 and port nr_arg2.
356342033Svmaffione		 *
357342033Svmaffione		 * Users can detect the end of the same bridge by
358342033Svmaffione		 * seeing the new and old value of nr_arg1, and can
359342033Svmaffione		 * detect the end of all the bridge by error != 0
360342033Svmaffione		 */
361342033Svmaffione		i = req->nr_bridge_idx;
362342033Svmaffione		j = req->nr_port_idx;
363342033Svmaffione
364342033Svmaffione		NMG_LOCK();
365342033Svmaffione		for (error = ENOENT; i < NM_BRIDGES; i++) {
366342033Svmaffione			b = bridges + i;
367342033Svmaffione			for ( ; j < NM_BDG_MAXPORTS; j++) {
368342033Svmaffione				if (b->bdg_ports[j] == NULL)
369342033Svmaffione					continue;
370342033Svmaffione				vpna = b->bdg_ports[j];
371342033Svmaffione				/* write back the VALE switch name */
372342033Svmaffione				strlcpy(hdr->nr_name, vpna->up.name,
373342033Svmaffione					sizeof(hdr->nr_name));
374342033Svmaffione				error = 0;
375342033Svmaffione				goto out;
376342033Svmaffione			}
377342033Svmaffione			j = 0; /* following bridges scan from 0 */
378342033Svmaffione		}
379342033Svmaffione	out:
380342033Svmaffione		req->nr_bridge_idx = i;
381342033Svmaffione		req->nr_port_idx = j;
382342033Svmaffione		NMG_UNLOCK();
383342033Svmaffione	}
384342033Svmaffione
385342033Svmaffione	return error;
386342033Svmaffione}
387342033Svmaffione
388342033Svmaffione/* Process NETMAP_REQ_VALE_ATTACH.
389342033Svmaffione */
390342033Svmaffioneint
391342033Svmaffionenetmap_vale_attach(struct nmreq_header *hdr, void *auth_token)
392342033Svmaffione{
393342033Svmaffione	struct nmreq_vale_attach *req =
394342033Svmaffione		(struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
395342033Svmaffione	struct netmap_vp_adapter * vpna;
396342033Svmaffione	struct netmap_adapter *na = NULL;
397342033Svmaffione	struct netmap_mem_d *nmd = NULL;
398342033Svmaffione	struct nm_bridge *b = NULL;
399342033Svmaffione	int error;
400342033Svmaffione
401342033Svmaffione	NMG_LOCK();
402342033Svmaffione	/* permission check for modified bridges */
403342033Svmaffione	b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
404342033Svmaffione	if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
405342033Svmaffione		error = EACCES;
406342033Svmaffione		goto unlock_exit;
407342033Svmaffione	}
408342033Svmaffione
409342033Svmaffione	if (req->reg.nr_mem_id) {
410342033Svmaffione		nmd = netmap_mem_find(req->reg.nr_mem_id);
411342033Svmaffione		if (nmd == NULL) {
412342033Svmaffione			error = EINVAL;
413342033Svmaffione			goto unlock_exit;
414342033Svmaffione		}
415342033Svmaffione	}
416342033Svmaffione
417342033Svmaffione	/* check for existing one */
418342033Svmaffione	error = netmap_get_vale_na(hdr, &na, nmd, 0);
419342033Svmaffione	if (na) {
420342033Svmaffione		error = EBUSY;
421342033Svmaffione		goto unref_exit;
422342033Svmaffione	}
423342033Svmaffione	error = netmap_get_vale_na(hdr, &na,
424342033Svmaffione				nmd, 1 /* create if not exists */);
425342033Svmaffione	if (error) { /* no device */
426342033Svmaffione		goto unlock_exit;
427342033Svmaffione	}
428342033Svmaffione
429342033Svmaffione	if (na == NULL) { /* VALE prefix missing */
430342033Svmaffione		error = EINVAL;
431342033Svmaffione		goto unlock_exit;
432342033Svmaffione	}
433342033Svmaffione
434342033Svmaffione	if (NETMAP_OWNED_BY_ANY(na)) {
435342033Svmaffione		error = EBUSY;
436342033Svmaffione		goto unref_exit;
437342033Svmaffione	}
438342033Svmaffione
439342033Svmaffione	if (na->nm_bdg_ctl) {
440342033Svmaffione		/* nop for VALE ports. The bwrap needs to put the hwna
441342033Svmaffione		 * in netmap mode (see netmap_bwrap_bdg_ctl)
442342033Svmaffione		 */
443342033Svmaffione		error = na->nm_bdg_ctl(hdr, na);
444342033Svmaffione		if (error)
445342033Svmaffione			goto unref_exit;
446344047Svmaffione		nm_prdis("registered %s to netmap-mode", na->name);
447342033Svmaffione	}
448342033Svmaffione	vpna = (struct netmap_vp_adapter *)na;
449342033Svmaffione	req->port_index = vpna->bdg_port;
450342033Svmaffione
451342033Svmaffione	if (nmd)
452342033Svmaffione		netmap_mem_put(nmd);
453342033Svmaffione
454342033Svmaffione	NMG_UNLOCK();
455342033Svmaffione	return 0;
456342033Svmaffione
457342033Svmaffioneunref_exit:
458342033Svmaffione	netmap_adapter_put(na);
459342033Svmaffioneunlock_exit:
460342033Svmaffione	if (nmd)
461342033Svmaffione		netmap_mem_put(nmd);
462342033Svmaffione
463342033Svmaffione	NMG_UNLOCK();
464342033Svmaffione	return error;
465342033Svmaffione}
466342033Svmaffione
467342033Svmaffione/* Process NETMAP_REQ_VALE_DETACH.
468342033Svmaffione */
469342033Svmaffioneint
470342033Svmaffionenetmap_vale_detach(struct nmreq_header *hdr, void *auth_token)
471342033Svmaffione{
472342033Svmaffione	struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body;
473342033Svmaffione	struct netmap_vp_adapter *vpna;
474342033Svmaffione	struct netmap_adapter *na;
475342033Svmaffione	struct nm_bridge *b = NULL;
476342033Svmaffione	int error;
477342033Svmaffione
478342033Svmaffione	NMG_LOCK();
479342033Svmaffione	/* permission check for modified bridges */
480342033Svmaffione	b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
481342033Svmaffione	if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
482342033Svmaffione		error = EACCES;
483342033Svmaffione		goto unlock_exit;
484342033Svmaffione	}
485342033Svmaffione
486342033Svmaffione	error = netmap_get_vale_na(hdr, &na, NULL, 0 /* don't create */);
487342033Svmaffione	if (error) { /* no device, or another bridge or user owns the device */
488342033Svmaffione		goto unlock_exit;
489342033Svmaffione	}
490342033Svmaffione
491342033Svmaffione	if (na == NULL) { /* VALE prefix missing */
492342033Svmaffione		error = EINVAL;
493342033Svmaffione		goto unlock_exit;
494342033Svmaffione	} else if (nm_is_bwrap(na) &&
495342033Svmaffione		   ((struct netmap_bwrap_adapter *)na)->na_polling_state) {
496342033Svmaffione		/* Don't detach a NIC with polling */
497342033Svmaffione		error = EBUSY;
498342033Svmaffione		goto unref_exit;
499342033Svmaffione	}
500342033Svmaffione
501342033Svmaffione	vpna = (struct netmap_vp_adapter *)na;
502342033Svmaffione	if (na->na_vp != vpna) {
503342033Svmaffione		/* trying to detach first attach of VALE persistent port attached
504342033Svmaffione		 * to 2 bridges
505342033Svmaffione		 */
506342033Svmaffione		error = EBUSY;
507342033Svmaffione		goto unref_exit;
508342033Svmaffione	}
509342033Svmaffione	nmreq_det->port_index = vpna->bdg_port;
510342033Svmaffione
511342033Svmaffione	if (na->nm_bdg_ctl) {
512342033Svmaffione		/* remove the port from bridge. The bwrap
513342033Svmaffione		 * also needs to put the hwna in normal mode
514342033Svmaffione		 */
515342033Svmaffione		error = na->nm_bdg_ctl(hdr, na);
516342033Svmaffione	}
517342033Svmaffione
518342033Svmaffioneunref_exit:
519342033Svmaffione	netmap_adapter_put(na);
520342033Svmaffioneunlock_exit:
521342033Svmaffione	NMG_UNLOCK();
522342033Svmaffione	return error;
523342033Svmaffione
524342033Svmaffione}
525342033Svmaffione
526342033Svmaffione
527341477Svmaffione/* nm_dtor callback for ephemeral VALE ports */
528341477Svmaffionestatic void
529342033Svmaffionenetmap_vale_vp_dtor(struct netmap_adapter *na)
530259412Sluigi{
531341477Svmaffione	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
532341477Svmaffione	struct nm_bridge *b = vpna->na_bdg;
533259412Sluigi
534344047Svmaffione	nm_prdis("%s has %d references", na->name, na->na_refcount);
535260700Sluigi
536341477Svmaffione	if (b) {
537341477Svmaffione		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
538259412Sluigi	}
539260368Sluigi
540341477Svmaffione	if (na->ifp != NULL && !nm_iszombie(na)) {
541341477Svmaffione		NM_DETACH_NA(na->ifp);
542341477Svmaffione		if (vpna->autodelete) {
543344047Svmaffione			nm_prdis("releasing %s", na->ifp->if_xname);
544341477Svmaffione			NMG_UNLOCK();
545341477Svmaffione			nm_os_vi_detach(na->ifp);
546341477Svmaffione			NMG_LOCK();
547341477Svmaffione		}
548259412Sluigi	}
549259412Sluigi}
550259412Sluigi
551259412Sluigi
552270063Sluigi
553270063Sluigi/* nm_krings_create callback for VALE ports.
554270063Sluigi * Calls the standard netmap_krings_create, then adds leases on rx
555270063Sluigi * rings and bdgfwd on tx rings.
556270063Sluigi */
557259412Sluigistatic int
558342033Svmaffionenetmap_vale_vp_krings_create(struct netmap_adapter *na)
559259412Sluigi{
560261909Sluigi	u_int tailroom;
561259412Sluigi	int error, i;
562259412Sluigi	uint32_t *leases;
563285349Sluigi	u_int nrx = netmap_real_rings(na, NR_RX);
564259412Sluigi
565259412Sluigi	/*
566259412Sluigi	 * Leases are attached to RX rings on vale ports
567259412Sluigi	 */
568259412Sluigi	tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;
569259412Sluigi
570261909Sluigi	error = netmap_krings_create(na, tailroom);
571259412Sluigi	if (error)
572259412Sluigi		return error;
573259412Sluigi
574259412Sluigi	leases = na->tailroom;
575259412Sluigi
576259412Sluigi	for (i = 0; i < nrx; i++) { /* Receive rings */
577341477Svmaffione		na->rx_rings[i]->nkr_leases = leases;
578259412Sluigi		leases += na->num_rx_desc;
579259412Sluigi	}
580259412Sluigi
581259412Sluigi	error = nm_alloc_bdgfwd(na);
582259412Sluigi	if (error) {
583259412Sluigi		netmap_krings_delete(na);
584259412Sluigi		return error;
585259412Sluigi	}
586259412Sluigi
587259412Sluigi	return 0;
588259412Sluigi}
589259412Sluigi
590260368Sluigi
591270063Sluigi/* nm_krings_delete callback for VALE ports. */
592259412Sluigistatic void
593342033Svmaffionenetmap_vale_vp_krings_delete(struct netmap_adapter *na)
594259412Sluigi{
595259412Sluigi	nm_free_bdgfwd(na);
596259412Sluigi	netmap_krings_delete(na);
597259412Sluigi}
598259412Sluigi
599259412Sluigi
600259412Sluigistatic int
601342033Svmaffionenm_vale_flush(struct nm_bdg_fwd *ft, u_int n,
602259412Sluigi	struct netmap_vp_adapter *na, u_int ring_nr);
603259412Sluigi
604259412Sluigi
605259412Sluigi/*
606270063Sluigi * main dispatch routine for the bridge.
607259412Sluigi * Grab packets from a kring, move them into the ft structure
608259412Sluigi * associated to the tx (input) port. Max one instance per port,
609259412Sluigi * filtered on input (ioctl, poll or XXX).
610259412Sluigi * Returns the next position in the ring.
611259412Sluigi */
612259412Sluigistatic int
613342033Svmaffionenm_vale_preflush(struct netmap_kring *kring, u_int end)
614259412Sluigi{
615270063Sluigi	struct netmap_vp_adapter *na =
616270063Sluigi		(struct netmap_vp_adapter*)kring->na;
617259412Sluigi	struct netmap_ring *ring = kring->ring;
618259412Sluigi	struct nm_bdg_fwd *ft;
619270063Sluigi	u_int ring_nr = kring->ring_id;
620259412Sluigi	u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
621259412Sluigi	u_int ft_i = 0;	/* start from 0 */
622259412Sluigi	u_int frags = 1; /* how many frags ? */
623259412Sluigi	struct nm_bridge *b = na->na_bdg;
624259412Sluigi
625259412Sluigi	/* To protect against modifications to the bridge we acquire a
626259412Sluigi	 * shared lock, waiting if we can sleep (if the source port is
627259412Sluigi	 * attached to a user process) or with a trylock otherwise (NICs).
628259412Sluigi	 */
629344047Svmaffione	nm_prdis("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
630259412Sluigi	if (na->up.na_flags & NAF_BDG_MAYSLEEP)
631259412Sluigi		BDG_RLOCK(b);
632259412Sluigi	else if (!BDG_RTRYLOCK(b))
633341477Svmaffione		return j;
634344047Svmaffione	nm_prdis(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
635259412Sluigi	ft = kring->nkr_ft;
636259412Sluigi
637259412Sluigi	for (; likely(j != end); j = nm_next(j, lim)) {
638259412Sluigi		struct netmap_slot *slot = &ring->slot[j];
639259412Sluigi		char *buf;
640259412Sluigi
641259412Sluigi		ft[ft_i].ft_len = slot->len;
642259412Sluigi		ft[ft_i].ft_flags = slot->flags;
643341477Svmaffione		ft[ft_i].ft_offset = 0;
644259412Sluigi
645344047Svmaffione		nm_prdis("flags is 0x%x", slot->flags);
646285349Sluigi		/* we do not use the buf changed flag, but we still need to reset it */
647285349Sluigi		slot->flags &= ~NS_BUF_CHANGED;
648285349Sluigi
649259412Sluigi		/* this slot goes into a list so initialize the link field */
650259412Sluigi		ft[ft_i].ft_next = NM_FT_NULL;
651259412Sluigi		buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
652270063Sluigi			(void *)(uintptr_t)slot->ptr : NMB(&na->up, slot);
653267151Sluigi		if (unlikely(buf == NULL)) {
654342033Svmaffione			nm_prlim(5, "NULL %s buffer pointer from %s slot %d len %d",
655267151Sluigi				(slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT",
656267151Sluigi				kring->name, j, ft[ft_i].ft_len);
657270063Sluigi			buf = ft[ft_i].ft_buf = NETMAP_BUF_BASE(&na->up);
658267151Sluigi			ft[ft_i].ft_len = 0;
659267151Sluigi			ft[ft_i].ft_flags = 0;
660267151Sluigi		}
661259487Sluigi		__builtin_prefetch(buf);
662259412Sluigi		++ft_i;
663259412Sluigi		if (slot->flags & NS_MOREFRAG) {
664259412Sluigi			frags++;
665259412Sluigi			continue;
666259412Sluigi		}
667259412Sluigi		if (unlikely(netmap_verbose && frags > 1))
668344047Svmaffione			nm_prlim(5, "%d frags at %d", frags, ft_i - frags);
669259412Sluigi		ft[ft_i - frags].ft_frags = frags;
670259412Sluigi		frags = 1;
671259412Sluigi		if (unlikely((int)ft_i >= bridge_batch))
672342033Svmaffione			ft_i = nm_vale_flush(ft, ft_i, na, ring_nr);
673259412Sluigi	}
674259412Sluigi	if (frags > 1) {
675341477Svmaffione		/* Here ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG, and we
676341477Svmaffione		 * have to fix frags count. */
677341477Svmaffione		frags--;
678341477Svmaffione		ft[ft_i - 1].ft_flags &= ~NS_MOREFRAG;
679341477Svmaffione		ft[ft_i - frags].ft_frags = frags;
680342033Svmaffione		nm_prlim(5, "Truncate incomplete fragment at %d (%d frags)", ft_i, frags);
681259412Sluigi	}
682259412Sluigi	if (ft_i)
683342033Svmaffione		ft_i = nm_vale_flush(ft, ft_i, na, ring_nr);
684259412Sluigi	BDG_RUNLOCK(b);
685259412Sluigi	return j;
686259412Sluigi}
687259412Sluigi
688259412Sluigi
689259412Sluigi/* ----- FreeBSD if_bridge hash function ------- */
690259412Sluigi
691259412Sluigi/*
692259412Sluigi * The following hash function is adapted from "Hash Functions" by Bob Jenkins
693259412Sluigi * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
694259412Sluigi *
695259412Sluigi * http://www.burtleburtle.net/bob/hash/spooky.html
696259412Sluigi */
697259412Sluigi#define mix(a, b, c)                                                    \
698259412Sluigido {                                                                    \
699341477Svmaffione	a -= b; a -= c; a ^= (c >> 13);                                 \
700341477Svmaffione	b -= c; b -= a; b ^= (a << 8);                                  \
701341477Svmaffione	c -= a; c -= b; c ^= (b >> 13);                                 \
702341477Svmaffione	a -= b; a -= c; a ^= (c >> 12);                                 \
703341477Svmaffione	b -= c; b -= a; b ^= (a << 16);                                 \
704341477Svmaffione	c -= a; c -= b; c ^= (b >> 5);                                  \
705341477Svmaffione	a -= b; a -= c; a ^= (c >> 3);                                  \
706341477Svmaffione	b -= c; b -= a; b ^= (a << 10);                                 \
707341477Svmaffione	c -= a; c -= b; c ^= (b >> 15);                                 \
708259412Sluigi} while (/*CONSTCOND*/0)
709259412Sluigi
710260368Sluigi
711259412Sluigistatic __inline uint32_t
712342033Svmaffionenm_vale_rthash(const uint8_t *addr)
713259412Sluigi{
714341477Svmaffione	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
715259412Sluigi
716341477Svmaffione	b += addr[5] << 8;
717341477Svmaffione	b += addr[4];
718341477Svmaffione	a += addr[3] << 24;
719341477Svmaffione	a += addr[2] << 16;
720341477Svmaffione	a += addr[1] << 8;
721341477Svmaffione	a += addr[0];
722259412Sluigi
723341477Svmaffione	mix(a, b, c);
724259412Sluigi#define BRIDGE_RTHASH_MASK	(NM_BDG_HASH-1)
725341477Svmaffione	return (c & BRIDGE_RTHASH_MASK);
726259412Sluigi}
727259412Sluigi
728259412Sluigi#undef mix
729259412Sluigi
730259412Sluigi
731259412Sluigi/*
732259412Sluigi * Lookup function for a learning bridge.
733259412Sluigi * Update the hash table with the source address,
734259412Sluigi * and then returns the destination port index, and the
735259412Sluigi * ring in *dst_ring (at the moment, always use ring 0)
736259412Sluigi */
737341477Svmaffioneuint32_t
738342033Svmaffionenetmap_vale_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
739341477Svmaffione		struct netmap_vp_adapter *na, void *private_data)
740259412Sluigi{
741341477Svmaffione	uint8_t *buf = ((uint8_t *)ft->ft_buf) + ft->ft_offset;
742341477Svmaffione	u_int buf_len = ft->ft_len - ft->ft_offset;
743341477Svmaffione	struct nm_hash_ent *ht = private_data;
744259412Sluigi	uint32_t sh, dh;
745259412Sluigi	u_int dst, mysrc = na->bdg_port;
746259412Sluigi	uint64_t smac, dmac;
747341477Svmaffione	uint8_t indbuf[12];
748259412Sluigi
749341477Svmaffione	if (buf_len < 14) {
750259412Sluigi		return NM_BDG_NOPORT;
751259412Sluigi	}
752341477Svmaffione
753341477Svmaffione	if (ft->ft_flags & NS_INDIRECT) {
754341477Svmaffione		if (copyin(buf, indbuf, sizeof(indbuf))) {
755341477Svmaffione			return NM_BDG_NOPORT;
756341477Svmaffione		}
757341477Svmaffione		buf = indbuf;
758341477Svmaffione	}
759341477Svmaffione
760259412Sluigi	dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
761259412Sluigi	smac = le64toh(*(uint64_t *)(buf + 4));
762259412Sluigi	smac >>= 16;
763259412Sluigi
764259412Sluigi	/*
765259412Sluigi	 * The hash is somewhat expensive, there might be some
766259412Sluigi	 * worthwhile optimizations here.
767259412Sluigi	 */
768285349Sluigi	if (((buf[6] & 1) == 0) && (na->last_smac != smac)) { /* valid src */
769259412Sluigi		uint8_t *s = buf+6;
770342033Svmaffione		sh = nm_vale_rthash(s); /* hash of source */
771259412Sluigi		/* update source port forwarding entry */
772285349Sluigi		na->last_smac = ht[sh].mac = smac;	/* XXX expire ? */
773259412Sluigi		ht[sh].ports = mysrc;
774342033Svmaffione		if (netmap_debug & NM_DEBUG_VALE)
775342033Svmaffione		    nm_prinf("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
776259412Sluigi			s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
777259412Sluigi	}
778259412Sluigi	dst = NM_BDG_BROADCAST;
779259412Sluigi	if ((buf[0] & 1) == 0) { /* unicast */
780342033Svmaffione		dh = nm_vale_rthash(buf); /* hash of dst */
781259412Sluigi		if (ht[dh].mac == dmac) {	/* found dst */
782259412Sluigi			dst = ht[dh].ports;
783259412Sluigi		}
784259412Sluigi	}
785259412Sluigi	return dst;
786259412Sluigi}
787259412Sluigi
788259412Sluigi
789259412Sluigi/*
790260368Sluigi * Available space in the ring. Only used in VALE code
791260368Sluigi * and only with is_rx = 1
792260368Sluigi */
793260368Sluigistatic inline uint32_t
794260368Sluiginm_kr_space(struct netmap_kring *k, int is_rx)
795260368Sluigi{
796260368Sluigi	int space;
797260368Sluigi
798260368Sluigi	if (is_rx) {
799260368Sluigi		int busy = k->nkr_hwlease - k->nr_hwcur;
800260368Sluigi		if (busy < 0)
801260368Sluigi			busy += k->nkr_num_slots;
802260368Sluigi		space = k->nkr_num_slots - 1 - busy;
803260368Sluigi	} else {
804260368Sluigi		/* XXX never used in this branch */
805260368Sluigi		space = k->nr_hwtail - k->nkr_hwlease;
806260368Sluigi		if (space < 0)
807260368Sluigi			space += k->nkr_num_slots;
808260368Sluigi	}
809260368Sluigi#if 0
810260368Sluigi	// sanity check
811260368Sluigi	if (k->nkr_hwlease >= k->nkr_num_slots ||
812260368Sluigi		k->nr_hwcur >= k->nkr_num_slots ||
813260368Sluigi		k->nr_tail >= k->nkr_num_slots ||
814260368Sluigi		busy < 0 ||
815260368Sluigi		busy >= k->nkr_num_slots) {
816344047Svmaffione		nm_prerr("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d",
817344047Svmaffione		    k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
818344047Svmaffione		    k->nkr_lease_idx, k->nkr_num_slots);
819260368Sluigi	}
820260368Sluigi#endif
821260368Sluigi	return space;
822260368Sluigi}
823260368Sluigi
824260368Sluigi
825260368Sluigi
826260368Sluigi
827260368Sluigi/* make a lease on the kring for N positions. return the
828260368Sluigi * lease index
829260368Sluigi * XXX only used in VALE code and with is_rx = 1
830260368Sluigi */
831260368Sluigistatic inline uint32_t
832260368Sluiginm_kr_lease(struct netmap_kring *k, u_int n, int is_rx)
833260368Sluigi{
834260368Sluigi	uint32_t lim = k->nkr_num_slots - 1;
835260368Sluigi	uint32_t lease_idx = k->nkr_lease_idx;
836260368Sluigi
837260368Sluigi	k->nkr_leases[lease_idx] = NR_NOSLOT;
838260368Sluigi	k->nkr_lease_idx = nm_next(lease_idx, lim);
839260368Sluigi
840342033Svmaffione#ifdef CONFIG_NETMAP_DEBUG
841260368Sluigi	if (n > nm_kr_space(k, is_rx)) {
842342033Svmaffione		nm_prerr("invalid request for %d slots", n);
843260368Sluigi		panic("x");
844260368Sluigi	}
845342033Svmaffione#endif /* CONFIG NETMAP_DEBUG */
846260368Sluigi	/* XXX verify that there are n slots */
847260368Sluigi	k->nkr_hwlease += n;
848260368Sluigi	if (k->nkr_hwlease > lim)
849260368Sluigi		k->nkr_hwlease -= lim + 1;
850260368Sluigi
851342033Svmaffione#ifdef CONFIG_NETMAP_DEBUG
852260368Sluigi	if (k->nkr_hwlease >= k->nkr_num_slots ||
853260368Sluigi		k->nr_hwcur >= k->nkr_num_slots ||
854260368Sluigi		k->nr_hwtail >= k->nkr_num_slots ||
855260368Sluigi		k->nkr_lease_idx >= k->nkr_num_slots) {
856342033Svmaffione		nm_prerr("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d",
857270063Sluigi			k->na->name,
858260368Sluigi			k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
859260368Sluigi			k->nkr_lease_idx, k->nkr_num_slots);
860260368Sluigi	}
861342033Svmaffione#endif /* CONFIG_NETMAP_DEBUG */
862260368Sluigi	return lease_idx;
863260368Sluigi}
864260368Sluigi
865260368Sluigi/*
866270063Sluigi *
867259412Sluigi * This flush routine supports only unicast and broadcast but a large
868259412Sluigi * number of ports, and lets us replace the learn and dispatch functions.
869259412Sluigi */
870259412Sluigiint
871342033Svmaffionenm_vale_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
872259412Sluigi		u_int ring_nr)
873259412Sluigi{
874342033Svmaffione	struct nm_vale_q *dst_ents, *brddst;
875259412Sluigi	uint16_t num_dsts = 0, *dsts;
876259412Sluigi	struct nm_bridge *b = na->na_bdg;
877341477Svmaffione	u_int i, me = na->bdg_port;
878259412Sluigi
879259412Sluigi	/*
880259412Sluigi	 * The work area (pointed by ft) is followed by an array of
881259412Sluigi	 * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
882259412Sluigi	 * queues per port plus one for the broadcast traffic.
883259412Sluigi	 * Then we have an array of destination indexes.
884259412Sluigi	 */
885342033Svmaffione	dst_ents = (struct nm_vale_q *)(ft + NM_BDG_BATCH_MAX);
886259412Sluigi	dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
887259412Sluigi
888259412Sluigi	/* first pass: find a destination for each packet in the batch */
889259412Sluigi	for (i = 0; likely(i < n); i += ft[i].ft_frags) {
890259412Sluigi		uint8_t dst_ring = ring_nr; /* default, same ring as origin */
891259412Sluigi		uint16_t dst_port, d_i;
892342033Svmaffione		struct nm_vale_q *d;
893341477Svmaffione		struct nm_bdg_fwd *start_ft = NULL;
894259412Sluigi
895344047Svmaffione		nm_prdis("slot %d frags %d", i, ft[i].ft_frags);
896341477Svmaffione
897341477Svmaffione		if (na->up.virt_hdr_len < ft[i].ft_len) {
898341477Svmaffione			ft[i].ft_offset = na->up.virt_hdr_len;
899341477Svmaffione			start_ft = &ft[i];
900341477Svmaffione		} else if (na->up.virt_hdr_len == ft[i].ft_len && ft[i].ft_flags & NS_MOREFRAG) {
901341477Svmaffione			ft[i].ft_offset = ft[i].ft_len;
902341477Svmaffione			start_ft = &ft[i+1];
903341477Svmaffione		} else {
904341477Svmaffione			/* Drop the packet if the virtio-net header is not into the first
905341477Svmaffione			 * fragment nor at the very beginning of the second.
906341477Svmaffione			 */
907259412Sluigi			continue;
908341477Svmaffione		}
909342033Svmaffione		dst_port = b->bdg_ops.lookup(start_ft, &dst_ring, na, b->private_data);
910259412Sluigi		if (netmap_verbose > 255)
911344047Svmaffione			nm_prlim(5, "slot %d port %d -> %d", i, me, dst_port);
912341477Svmaffione		if (dst_port >= NM_BDG_NOPORT)
913259412Sluigi			continue; /* this packet is identified to be dropped */
914259412Sluigi		else if (dst_port == NM_BDG_BROADCAST)
915259412Sluigi			dst_ring = 0; /* broadcasts always go to ring 0 */
916259412Sluigi		else if (unlikely(dst_port == me ||
917259412Sluigi		    !b->bdg_ports[dst_port]))
918259412Sluigi			continue;
919259412Sluigi
920259412Sluigi		/* get a position in the scratch pad */
921259412Sluigi		d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
922259412Sluigi		d = dst_ents + d_i;
923259412Sluigi
924259412Sluigi		/* append the first fragment to the list */
925259412Sluigi		if (d->bq_head == NM_FT_NULL) { /* new destination */
926259412Sluigi			d->bq_head = d->bq_tail = i;
927259412Sluigi			/* remember this position to be scanned later */
928259412Sluigi			if (dst_port != NM_BDG_BROADCAST)
929259412Sluigi				dsts[num_dsts++] = d_i;
930259412Sluigi		} else {
931259412Sluigi			ft[d->bq_tail].ft_next = i;
932259412Sluigi			d->bq_tail = i;
933259412Sluigi		}
934259412Sluigi		d->bq_len += ft[i].ft_frags;
935259412Sluigi	}
936259412Sluigi
937259412Sluigi	/*
938259412Sluigi	 * Broadcast traffic goes to ring 0 on all destinations.
939259412Sluigi	 * So we need to add these rings to the list of ports to scan.
940259412Sluigi	 * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
941259412Sluigi	 * expensive. We should keep a compact list of active destinations
942259412Sluigi	 * so we could shorten this loop.
943259412Sluigi	 */
944259412Sluigi	brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
945259412Sluigi	if (brddst->bq_head != NM_FT_NULL) {
946341477Svmaffione		u_int j;
947259412Sluigi		for (j = 0; likely(j < b->bdg_active_ports); j++) {
948259412Sluigi			uint16_t d_i;
949259412Sluigi			i = b->bdg_port_index[j];
950259412Sluigi			if (unlikely(i == me))
951259412Sluigi				continue;
952259412Sluigi			d_i = i * NM_BDG_MAXRINGS;
953259412Sluigi			if (dst_ents[d_i].bq_head == NM_FT_NULL)
954259412Sluigi				dsts[num_dsts++] = d_i;
955259412Sluigi		}
956259412Sluigi	}
957259412Sluigi
958344047Svmaffione	nm_prdis(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
959270063Sluigi	/* second pass: scan destinations */
960259412Sluigi	for (i = 0; i < num_dsts; i++) {
961259412Sluigi		struct netmap_vp_adapter *dst_na;
962259412Sluigi		struct netmap_kring *kring;
963259412Sluigi		struct netmap_ring *ring;
964261909Sluigi		u_int dst_nr, lim, j, d_i, next, brd_next;
965259412Sluigi		u_int needed, howmany;
966259412Sluigi		int retry = netmap_txsync_retry;
967342033Svmaffione		struct nm_vale_q *d;
968259412Sluigi		uint32_t my_start = 0, lease_idx = 0;
969259412Sluigi		int nrings;
970261909Sluigi		int virt_hdr_mismatch = 0;
971259412Sluigi
972259412Sluigi		d_i = dsts[i];
973344047Svmaffione		nm_prdis("second pass %d port %d", i, d_i);
974259412Sluigi		d = dst_ents + d_i;
975259412Sluigi		// XXX fix the division
976259412Sluigi		dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
977259412Sluigi		/* protect from the lookup function returning an inactive
978259412Sluigi		 * destination port
979259412Sluigi		 */
980259412Sluigi		if (unlikely(dst_na == NULL))
981259412Sluigi			goto cleanup;
982259412Sluigi		if (dst_na->up.na_flags & NAF_SW_ONLY)
983259412Sluigi			goto cleanup;
984259412Sluigi		/*
985259412Sluigi		 * The interface may be in !netmap mode in two cases:
986259412Sluigi		 * - when na is attached but not activated yet;
987259412Sluigi		 * - when na is being deactivated but is still attached.
988259412Sluigi		 */
989270063Sluigi		if (unlikely(!nm_netmap_on(&dst_na->up))) {
990344047Svmaffione			nm_prdis("not in netmap mode!");
991259412Sluigi			goto cleanup;
992259412Sluigi		}
993259412Sluigi
994259412Sluigi		/* there is at least one either unicast or broadcast packet */
995259412Sluigi		brd_next = brddst->bq_head;
996259412Sluigi		next = d->bq_head;
997259412Sluigi		/* we need to reserve this many slots. If fewer are
998259412Sluigi		 * available, some packets will be dropped.
999259412Sluigi		 * Packets may have multiple fragments, so we may not use
1000259412Sluigi		 * there is a chance that we may not use all of the slots
1001259412Sluigi		 * we have claimed, so we will need to handle the leftover
1002259412Sluigi		 * ones when we regain the lock.
1003259412Sluigi		 */
1004259412Sluigi		needed = d->bq_len + brddst->bq_len;
1005259412Sluigi
1006341477Svmaffione		if (unlikely(dst_na->up.virt_hdr_len != na->up.virt_hdr_len)) {
1007341477Svmaffione			if (netmap_verbose) {
1008344047Svmaffione				nm_prlim(3, "virt_hdr_mismatch, src %d dst %d", na->up.virt_hdr_len,
1009341477Svmaffione						dst_na->up.virt_hdr_len);
1010341477Svmaffione			}
1011261909Sluigi			/* There is a virtio-net header/offloadings mismatch between
1012261909Sluigi			 * source and destination. The slower mismatch datapath will
1013261909Sluigi			 * be used to cope with all the mismatches.
1014261909Sluigi			 */
1015261909Sluigi			virt_hdr_mismatch = 1;
1016261909Sluigi			if (dst_na->mfs < na->mfs) {
1017261909Sluigi				/* We may need to do segmentation offloadings, and so
1018261909Sluigi				 * we may need a number of destination slots greater
1019261909Sluigi				 * than the number of input slots ('needed').
1020261909Sluigi				 * We look for the smallest integer 'x' which satisfies:
1021261909Sluigi				 *	needed * na->mfs + x * H <= x * na->mfs
1022261909Sluigi				 * where 'H' is the length of the longest header that may
1023261909Sluigi				 * be replicated in the segmentation process (e.g. for
1024261909Sluigi				 * TCPv4 we must account for ethernet header, IP header
1025261909Sluigi				 * and TCPv4 header).
1026261909Sluigi				 */
1027341477Svmaffione				KASSERT(dst_na->mfs > 0, ("vpna->mfs is 0"));
1028261909Sluigi				needed = (needed * na->mfs) /
1029261909Sluigi						(dst_na->mfs - WORST_CASE_GSO_HEADER) + 1;
1030344047Svmaffione				nm_prdis(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed);
1031261909Sluigi			}
1032261909Sluigi		}
1033261909Sluigi
1034344047Svmaffione		nm_prdis(5, "pass 2 dst %d is %x %s",
1035259412Sluigi			i, d_i, is_vp ? "virtual" : "nic/host");
1036259412Sluigi		dst_nr = d_i & (NM_BDG_MAXRINGS-1);
1037259412Sluigi		nrings = dst_na->up.num_rx_rings;
1038259412Sluigi		if (dst_nr >= nrings)
1039259412Sluigi			dst_nr = dst_nr % nrings;
1040341477Svmaffione		kring = dst_na->up.rx_rings[dst_nr];
1041259412Sluigi		ring = kring->ring;
1042341477Svmaffione		/* the destination ring may have not been opened for RX */
1043341477Svmaffione		if (unlikely(ring == NULL || kring->nr_mode != NKR_NETMAP_ON))
1044341477Svmaffione			goto cleanup;
1045259412Sluigi		lim = kring->nkr_num_slots - 1;
1046259412Sluigi
1047259412Sluigiretry:
1048259412Sluigi
1049261909Sluigi		if (dst_na->retry && retry) {
1050261909Sluigi			/* try to get some free slot from the previous run */
1051342033Svmaffione			kring->nm_notify(kring, NAF_FORCE_RECLAIM);
1052270063Sluigi			/* actually useful only for bwraps, since there
1053270063Sluigi			 * the notify will trigger a txsync on the hwna. VALE ports
1054270063Sluigi			 * have dst_na->retry == 0
1055270063Sluigi			 */
1056261909Sluigi		}
1057259412Sluigi		/* reserve the buffers in the queue and an entry
1058259412Sluigi		 * to report completion, and drop lock.
1059259412Sluigi		 * XXX this might become a helper function.
1060259412Sluigi		 */
1061259412Sluigi		mtx_lock(&kring->q_lock);
1062259412Sluigi		if (kring->nkr_stopped) {
1063259412Sluigi			mtx_unlock(&kring->q_lock);
1064259412Sluigi			goto cleanup;
1065259412Sluigi		}
1066259412Sluigi		my_start = j = kring->nkr_hwlease;
1067259412Sluigi		howmany = nm_kr_space(kring, 1);
1068259412Sluigi		if (needed < howmany)
1069259412Sluigi			howmany = needed;
1070259412Sluigi		lease_idx = nm_kr_lease(kring, howmany, 1);
1071259412Sluigi		mtx_unlock(&kring->q_lock);
1072259412Sluigi
1073259412Sluigi		/* only retry if we need more than available slots */
1074259412Sluigi		if (retry && needed <= howmany)
1075259412Sluigi			retry = 0;
1076259412Sluigi
1077259412Sluigi		/* copy to the destination queue */
1078259412Sluigi		while (howmany > 0) {
1079259412Sluigi			struct netmap_slot *slot;
1080259412Sluigi			struct nm_bdg_fwd *ft_p, *ft_end;
1081259412Sluigi			u_int cnt;
1082259412Sluigi
1083259412Sluigi			/* find the queue from which we pick next packet.
1084259412Sluigi			 * NM_FT_NULL is always higher than valid indexes
1085259412Sluigi			 * so we never dereference it if the other list
1086259412Sluigi			 * has packets (and if both are empty we never
1087259412Sluigi			 * get here).
1088259412Sluigi			 */
1089259412Sluigi			if (next < brd_next) {
1090259412Sluigi				ft_p = ft + next;
1091259412Sluigi				next = ft_p->ft_next;
1092259412Sluigi			} else { /* insert broadcast */
1093259412Sluigi				ft_p = ft + brd_next;
1094259412Sluigi				brd_next = ft_p->ft_next;
1095259412Sluigi			}
1096259412Sluigi			cnt = ft_p->ft_frags; // cnt > 0
1097259412Sluigi			if (unlikely(cnt > howmany))
1098259412Sluigi			    break; /* no more space */
1099259412Sluigi			if (netmap_verbose && cnt > 1)
1100344047Svmaffione				nm_prlim(5, "rx %d frags to %d", cnt, j);
1101259412Sluigi			ft_end = ft_p + cnt;
1102261909Sluigi			if (unlikely(virt_hdr_mismatch)) {
1103261909Sluigi				bdg_mismatch_datapath(na, dst_na, ft_p, ring, &j, lim, &howmany);
1104261909Sluigi			} else {
1105261909Sluigi				howmany -= cnt;
1106261909Sluigi				do {
1107261909Sluigi					char *dst, *src = ft_p->ft_buf;
1108261909Sluigi					size_t copy_len = ft_p->ft_len, dst_len = copy_len;
1109259412Sluigi
1110261909Sluigi					slot = &ring->slot[j];
1111270063Sluigi					dst = NMB(&dst_na->up, slot);
1112259412Sluigi
1113344047Svmaffione					nm_prdis("send [%d] %d(%d) bytes at %s:%d",
1114261909Sluigi							i, (int)copy_len, (int)dst_len,
1115261909Sluigi							NM_IFPNAME(dst_ifp), j);
1116261909Sluigi					/* round to a multiple of 64 */
1117261909Sluigi					copy_len = (copy_len + 63) & ~63;
1118260368Sluigi
1119270063Sluigi					if (unlikely(copy_len > NETMAP_BUF_SIZE(&dst_na->up) ||
1120270063Sluigi						     copy_len > NETMAP_BUF_SIZE(&na->up))) {
1121344047Svmaffione						nm_prlim(5, "invalid len %d, down to 64", (int)copy_len);
1122267151Sluigi						copy_len = dst_len = 64; // XXX
1123267151Sluigi					}
1124261909Sluigi					if (ft_p->ft_flags & NS_INDIRECT) {
1125261909Sluigi						if (copyin(src, dst, copy_len)) {
1126261909Sluigi							// invalid user pointer, pretend len is 0
1127261909Sluigi							dst_len = 0;
1128261909Sluigi						}
1129261909Sluigi					} else {
1130261909Sluigi						//memcpy(dst, src, copy_len);
1131261909Sluigi						pkt_copy(src, dst, (int)copy_len);
1132261909Sluigi					}
1133261909Sluigi					slot->len = dst_len;
1134261909Sluigi					slot->flags = (cnt << 8)| NS_MOREFRAG;
1135261909Sluigi					j = nm_next(j, lim);
1136261909Sluigi					needed--;
1137261909Sluigi					ft_p++;
1138261909Sluigi				} while (ft_p != ft_end);
1139261909Sluigi				slot->flags = (cnt << 8); /* clear flag on last entry */
1140261909Sluigi			}
1141259412Sluigi			/* are we done ? */
1142259412Sluigi			if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
1143259412Sluigi				break;
1144259412Sluigi		}
1145259412Sluigi		{
1146259412Sluigi		    /* current position */
1147259412Sluigi		    uint32_t *p = kring->nkr_leases; /* shorthand */
1148259412Sluigi		    uint32_t update_pos;
1149259412Sluigi		    int still_locked = 1;
1150259412Sluigi
1151259412Sluigi		    mtx_lock(&kring->q_lock);
1152259412Sluigi		    if (unlikely(howmany > 0)) {
1153259412Sluigi			/* not used all bufs. If i am the last one
1154259412Sluigi			 * i can recover the slots, otherwise must
1155259412Sluigi			 * fill them with 0 to mark empty packets.
1156259412Sluigi			 */
1157344047Svmaffione			nm_prdis("leftover %d bufs", howmany);
1158259412Sluigi			if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
1159259412Sluigi			    /* yes i am the last one */
1160344047Svmaffione			    nm_prdis("roll back nkr_hwlease to %d", j);
1161259412Sluigi			    kring->nkr_hwlease = j;
1162259412Sluigi			} else {
1163259412Sluigi			    while (howmany-- > 0) {
1164259412Sluigi				ring->slot[j].len = 0;
1165259412Sluigi				ring->slot[j].flags = 0;
1166259412Sluigi				j = nm_next(j, lim);
1167259412Sluigi			    }
1168259412Sluigi			}
1169259412Sluigi		    }
1170259412Sluigi		    p[lease_idx] = j; /* report I am done */
1171259412Sluigi
1172260368Sluigi		    update_pos = kring->nr_hwtail;
1173259412Sluigi
1174259412Sluigi		    if (my_start == update_pos) {
1175259412Sluigi			/* all slots before my_start have been reported,
1176259412Sluigi			 * so scan subsequent leases to see if other ranges
1177259412Sluigi			 * have been completed, and to a selwakeup or txsync.
1178259412Sluigi		         */
1179259412Sluigi			while (lease_idx != kring->nkr_lease_idx &&
1180259412Sluigi				p[lease_idx] != NR_NOSLOT) {
1181259412Sluigi			    j = p[lease_idx];
1182259412Sluigi			    p[lease_idx] = NR_NOSLOT;
1183259412Sluigi			    lease_idx = nm_next(lease_idx, lim);
1184259412Sluigi			}
1185259412Sluigi			/* j is the new 'write' position. j != my_start
1186259412Sluigi			 * means there are new buffers to report
1187259412Sluigi			 */
1188259412Sluigi			if (likely(j != my_start)) {
1189260368Sluigi				kring->nr_hwtail = j;
1190259412Sluigi				still_locked = 0;
1191259412Sluigi				mtx_unlock(&kring->q_lock);
1192285349Sluigi				kring->nm_notify(kring, 0);
1193270063Sluigi				/* this is netmap_notify for VALE ports and
1194270063Sluigi				 * netmap_bwrap_notify for bwrap. The latter will
1195270063Sluigi				 * trigger a txsync on the underlying hwna
1196270063Sluigi				 */
1197270063Sluigi				if (dst_na->retry && retry--) {
1198270063Sluigi					/* XXX this is going to call nm_notify again.
1199270063Sluigi					 * Only useful for bwrap in virtual machines
1200270063Sluigi					 */
1201259412Sluigi					goto retry;
1202270063Sluigi				}
1203259412Sluigi			}
1204259412Sluigi		    }
1205259412Sluigi		    if (still_locked)
1206259412Sluigi			mtx_unlock(&kring->q_lock);
1207259412Sluigi		}
1208259412Sluigicleanup:
1209259412Sluigi		d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
1210259412Sluigi		d->bq_len = 0;
1211259412Sluigi	}
1212259412Sluigi	brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
1213259412Sluigi	brddst->bq_len = 0;
1214259412Sluigi	return 0;
1215259412Sluigi}
1216259412Sluigi
1217270063Sluigi/* nm_txsync callback for VALE ports */
1218259412Sluigistatic int
1219342033Svmaffionenetmap_vale_vp_txsync(struct netmap_kring *kring, int flags)
1220259412Sluigi{
1221270063Sluigi	struct netmap_vp_adapter *na =
1222270063Sluigi		(struct netmap_vp_adapter *)kring->na;
1223260368Sluigi	u_int done;
1224260368Sluigi	u_int const lim = kring->nkr_num_slots - 1;
1225285349Sluigi	u_int const head = kring->rhead;
1226259412Sluigi
1227259412Sluigi	if (bridge_batch <= 0) { /* testing only */
1228285349Sluigi		done = head; // used all
1229259412Sluigi		goto done;
1230259412Sluigi	}
1231270063Sluigi	if (!na->na_bdg) {
1232285349Sluigi		done = head;
1233270063Sluigi		goto done;
1234270063Sluigi	}
1235259412Sluigi	if (bridge_batch > NM_BDG_BATCH)
1236259412Sluigi		bridge_batch = NM_BDG_BATCH;
1237259412Sluigi
1238342033Svmaffione	done = nm_vale_preflush(kring, head);
1239259412Sluigidone:
1240285349Sluigi	if (done != head)
1241342033Svmaffione		nm_prerr("early break at %d/ %d, tail %d", done, head, kring->nr_hwtail);
1242260368Sluigi	/*
1243260368Sluigi	 * packets between 'done' and 'cur' are left unsent.
1244260368Sluigi	 */
1245260368Sluigi	kring->nr_hwcur = done;
1246260368Sluigi	kring->nr_hwtail = nm_prev(done, lim);
1247342033Svmaffione	if (netmap_debug & NM_DEBUG_TXSYNC)
1248342033Svmaffione		nm_prinf("%s ring %d flags %d", na->up.name, kring->ring_id, flags);
1249259412Sluigi	return 0;
1250259412Sluigi}
1251259412Sluigi
1252259412Sluigi
1253270063Sluigi/* create a netmap_vp_adapter that describes a VALE port.
1254270063Sluigi * Only persistent VALE ports have a non-null ifp.
1255270063Sluigi */
1256270063Sluigistatic int
1257342033Svmaffionenetmap_vale_vp_create(struct nmreq_header *hdr, struct ifnet *ifp,
1258341477Svmaffione		struct netmap_mem_d *nmd, struct netmap_vp_adapter **ret)
1259270063Sluigi{
1260341477Svmaffione	struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body;
1261259412Sluigi	struct netmap_vp_adapter *vpna;
1262259412Sluigi	struct netmap_adapter *na;
1263341477Svmaffione	int error = 0;
1264261909Sluigi	u_int npipes = 0;
1265341477Svmaffione	u_int extrabufs = 0;
1266259412Sluigi
1267341477Svmaffione	if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
1268341477Svmaffione		return EINVAL;
1269341477Svmaffione	}
1270341477Svmaffione
1271341477Svmaffione	vpna = nm_os_malloc(sizeof(*vpna));
1272259412Sluigi	if (vpna == NULL)
1273259412Sluigi		return ENOMEM;
1274259412Sluigi
1275259412Sluigi 	na = &vpna->up;
1276259412Sluigi
1277259412Sluigi	na->ifp = ifp;
1278342033Svmaffione	strlcpy(na->name, hdr->nr_name, sizeof(na->name));
1279259412Sluigi
1280259412Sluigi	/* bound checking */
1281341477Svmaffione	na->num_tx_rings = req->nr_tx_rings;
1282259412Sluigi	nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1283341477Svmaffione	req->nr_tx_rings = na->num_tx_rings; /* write back */
1284341477Svmaffione	na->num_rx_rings = req->nr_rx_rings;
1285259412Sluigi	nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1286341477Svmaffione	req->nr_rx_rings = na->num_rx_rings; /* write back */
1287341477Svmaffione	nm_bound_var(&req->nr_tx_slots, NM_BRIDGE_RINGSIZE,
1288259412Sluigi			1, NM_BDG_MAXSLOTS, NULL);
1289341477Svmaffione	na->num_tx_desc = req->nr_tx_slots;
1290341477Svmaffione	nm_bound_var(&req->nr_rx_slots, NM_BRIDGE_RINGSIZE,
1291259412Sluigi			1, NM_BDG_MAXSLOTS, NULL);
1292261909Sluigi	/* validate number of pipes. We want at least 1,
1293261909Sluigi	 * but probably can do with some more.
1294261909Sluigi	 * So let's use 2 as default (when 0 is supplied)
1295261909Sluigi	 */
1296261909Sluigi	nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL);
1297261909Sluigi	/* validate extra bufs */
1298342033Svmaffione	extrabufs = req->nr_extra_bufs;
1299341477Svmaffione	nm_bound_var(&extrabufs, 0, 0,
1300261909Sluigi			128*NM_BDG_MAXSLOTS, NULL);
1301341477Svmaffione	req->nr_extra_bufs = extrabufs; /* write back */
1302341477Svmaffione	na->num_rx_desc = req->nr_rx_slots;
1303341477Svmaffione	/* Set the mfs to a default value, as it is needed on the VALE
1304341477Svmaffione	 * mismatch datapath. XXX We should set it according to the MTU
1305341477Svmaffione	 * known to the kernel. */
1306341477Svmaffione	vpna->mfs = NM_BDG_MFS_DEFAULT;
1307285349Sluigi	vpna->last_smac = ~0llu;
1308261909Sluigi	/*if (vpna->mfs > netmap_buf_size)  TODO netmap_buf_size is zero??
1309261909Sluigi		vpna->mfs = netmap_buf_size; */
1310341477Svmaffione	if (netmap_verbose)
1311342033Svmaffione		nm_prinf("max frame size %u", vpna->mfs);
1312259412Sluigi
1313285349Sluigi	na->na_flags |= NAF_BDG_MAYSLEEP;
1314285698Sluigi	/* persistent VALE ports look like hw devices
1315285698Sluigi	 * with a native netmap adapter
1316285698Sluigi	 */
1317285698Sluigi	if (ifp)
1318285698Sluigi		na->na_flags |= NAF_NATIVE;
1319342033Svmaffione	na->nm_txsync = netmap_vale_vp_txsync;
1320342033Svmaffione	na->nm_rxsync = netmap_vp_rxsync; /* use the one provided by bdg */
1321342033Svmaffione	na->nm_register = netmap_vp_reg;  /* use the one provided by bdg */
1322342033Svmaffione	na->nm_krings_create = netmap_vale_vp_krings_create;
1323342033Svmaffione	na->nm_krings_delete = netmap_vale_vp_krings_delete;
1324342033Svmaffione	na->nm_dtor = netmap_vale_vp_dtor;
1325344047Svmaffione	nm_prdis("nr_mem_id %d", req->nr_mem_id);
1326341477Svmaffione	na->nm_mem = nmd ?
1327341477Svmaffione		netmap_mem_get(nmd):
1328341477Svmaffione		netmap_mem_private_new(
1329259412Sluigi			na->num_tx_rings, na->num_tx_desc,
1330261909Sluigi			na->num_rx_rings, na->num_rx_desc,
1331341477Svmaffione			req->nr_extra_bufs, npipes, &error);
1332261909Sluigi	if (na->nm_mem == NULL)
1333261909Sluigi		goto err;
1334342033Svmaffione	na->nm_bdg_attach = netmap_vale_vp_bdg_attach;
1335259412Sluigi	/* other nmd fields are set in the common routine */
1336259412Sluigi	error = netmap_attach_common(na);
1337261909Sluigi	if (error)
1338261909Sluigi		goto err;
1339270063Sluigi	*ret = vpna;
1340259412Sluigi	return 0;
1341261909Sluigi
1342261909Sluigierr:
1343261909Sluigi	if (na->nm_mem != NULL)
1344341477Svmaffione		netmap_mem_put(na->nm_mem);
1345341477Svmaffione	nm_os_free(vpna);
1346261909Sluigi	return error;
1347259412Sluigi}
1348259412Sluigi
1349341477Svmaffione/* nm_bdg_attach callback for VALE ports
1350341477Svmaffione * The na_vp port is this same netmap_adapter. There is no host port.
1351270063Sluigi */
1352259412Sluigistatic int
1353342033Svmaffionenetmap_vale_vp_bdg_attach(const char *name, struct netmap_adapter *na,
1354341477Svmaffione		struct nm_bridge *b)
1355259412Sluigi{
1356341477Svmaffione	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
1357259412Sluigi
1358342033Svmaffione	if ((b->bdg_flags & NM_BDG_NEED_BWRAP) || vpna->na_bdg) {
1359341477Svmaffione		return NM_NEED_BWRAP;
1360259412Sluigi	}
1361341477Svmaffione	na->na_vp = vpna;
1362342033Svmaffione	strlcpy(na->name, name, sizeof(na->name));
1363341477Svmaffione	na->na_hostvp = NULL;
1364259412Sluigi	return 0;
1365259412Sluigi}
1366259412Sluigi
1367259412Sluigistatic int
1368341477Svmaffionenetmap_vale_bwrap_krings_create(struct netmap_adapter *na)
1369259412Sluigi{
1370259412Sluigi	int error;
1371259412Sluigi
1372270063Sluigi	/* impersonate a netmap_vp_adapter */
1373342033Svmaffione	error = netmap_vale_vp_krings_create(na);
1374259412Sluigi	if (error)
1375259412Sluigi		return error;
1376341477Svmaffione	error = netmap_bwrap_krings_create_common(na);
1377259412Sluigi	if (error) {
1378342033Svmaffione		netmap_vale_vp_krings_delete(na);
1379259412Sluigi	}
1380341477Svmaffione	return error;
1381259412Sluigi}
1382259412Sluigi
1383259412Sluigistatic void
1384341477Svmaffionenetmap_vale_bwrap_krings_delete(struct netmap_adapter *na)
1385259412Sluigi{
1386341477Svmaffione	netmap_bwrap_krings_delete_common(na);
1387342033Svmaffione	netmap_vale_vp_krings_delete(na);
1388259412Sluigi}
1389259412Sluigi
1390259412Sluigistatic int
1391341477Svmaffionenetmap_vale_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
1392259412Sluigi{
1393341477Svmaffione	struct netmap_bwrap_adapter *bna;
1394341477Svmaffione	struct netmap_adapter *na = NULL;
1395341477Svmaffione	struct netmap_adapter *hostna = NULL;
1396341477Svmaffione	int error;
1397259412Sluigi
1398341477Svmaffione	bna = nm_os_malloc(sizeof(*bna));
1399341477Svmaffione	if (bna == NULL) {
1400341477Svmaffione		return ENOMEM;
1401341477Svmaffione	}
1402341477Svmaffione	na = &bna->up.up;
1403342033Svmaffione	strlcpy(na->name, nr_name, sizeof(na->name));
1404341477Svmaffione	na->nm_register = netmap_bwrap_reg;
1405342033Svmaffione	na->nm_txsync = netmap_vale_vp_txsync;
1406341477Svmaffione	// na->nm_rxsync = netmap_bwrap_rxsync;
1407341477Svmaffione	na->nm_krings_create = netmap_vale_bwrap_krings_create;
1408341477Svmaffione	na->nm_krings_delete = netmap_vale_bwrap_krings_delete;
1409341477Svmaffione	na->nm_notify = netmap_bwrap_notify;
1410341477Svmaffione	bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
1411341477Svmaffione	/* Set the mfs, needed on the VALE mismatch datapath. */
1412341477Svmaffione	bna->up.mfs = NM_BDG_MFS_DEFAULT;
1413259412Sluigi
1414341477Svmaffione	if (hwna->na_flags & NAF_HOST_RINGS) {
1415341477Svmaffione		hostna = &bna->host.up;
1416341477Svmaffione		hostna->nm_notify = netmap_bwrap_notify;
1417341477Svmaffione		bna->host.mfs = NM_BDG_MFS_DEFAULT;
1418341477Svmaffione	}
1419285349Sluigi
1420341477Svmaffione	error = netmap_bwrap_attach_common(na, hwna);
1421341477Svmaffione	if (error) {
1422341477Svmaffione		nm_os_free(bna);
1423341477Svmaffione	}
1424259412Sluigi	return error;
1425259412Sluigi}
1426259412Sluigi
1427341477Svmaffioneint
1428341477Svmaffionenetmap_get_vale_na(struct nmreq_header *hdr, struct netmap_adapter **na,
1429341477Svmaffione		struct netmap_mem_d *nmd, int create)
1430341477Svmaffione{
1431341477Svmaffione	return netmap_get_bdg_na(hdr, na, nmd, create, &vale_bdg_ops);
1432341477Svmaffione}
1433260368Sluigi
1434341477Svmaffione
1435341477Svmaffione/* creates a persistent VALE port */
1436341477Svmaffioneint
1437341477Svmaffionenm_vi_create(struct nmreq_header *hdr)
1438259412Sluigi{
1439341477Svmaffione	struct nmreq_vale_newif *req =
1440341477Svmaffione		(struct nmreq_vale_newif *)(uintptr_t)hdr->nr_body;
1441270063Sluigi	int error = 0;
1442341477Svmaffione	/* Build a nmreq_register out of the nmreq_vale_newif,
1443341477Svmaffione	 * so that we can call netmap_get_bdg_na(). */
1444341477Svmaffione	struct nmreq_register regreq;
1445341477Svmaffione	bzero(&regreq, sizeof(regreq));
1446341477Svmaffione	regreq.nr_tx_slots = req->nr_tx_slots;
1447341477Svmaffione	regreq.nr_rx_slots = req->nr_rx_slots;
1448341477Svmaffione	regreq.nr_tx_rings = req->nr_tx_rings;
1449341477Svmaffione	regreq.nr_rx_rings = req->nr_rx_rings;
1450341477Svmaffione	regreq.nr_mem_id = req->nr_mem_id;
1451341477Svmaffione	hdr->nr_reqtype = NETMAP_REQ_REGISTER;
1452341477Svmaffione	hdr->nr_body = (uintptr_t)&regreq;
1453341477Svmaffione	error = netmap_vi_create(hdr, 0 /* no autodelete */);
1454341477Svmaffione	hdr->nr_reqtype = NETMAP_REQ_VALE_NEWIF;
1455341477Svmaffione	hdr->nr_body = (uintptr_t)req;
1456341477Svmaffione	/* Write back to the original struct. */
1457341477Svmaffione	req->nr_tx_slots = regreq.nr_tx_slots;
1458341477Svmaffione	req->nr_rx_slots = regreq.nr_rx_slots;
1459341477Svmaffione	req->nr_tx_rings = regreq.nr_tx_rings;
1460341477Svmaffione	req->nr_rx_rings = regreq.nr_rx_rings;
1461341477Svmaffione	req->nr_mem_id = regreq.nr_mem_id;
1462270063Sluigi	return error;
1463270063Sluigi}
1464270063Sluigi
1465341477Svmaffione/* remove a persistent VALE port from the system */
1466270063Sluigiint
1467341477Svmaffionenm_vi_destroy(const char *name)
1468270063Sluigi{
1469341477Svmaffione	struct ifnet *ifp;
1470341477Svmaffione	struct netmap_vp_adapter *vpna;
1471341477Svmaffione	int error;
1472259412Sluigi
1473341477Svmaffione	ifp = ifunit_ref(name);
1474341477Svmaffione	if (!ifp)
1475341477Svmaffione		return ENXIO;
1476341477Svmaffione	NMG_LOCK();
1477341477Svmaffione	/* make sure this is actually a VALE port */
1478341477Svmaffione	if (!NM_NA_VALID(ifp) || NA(ifp)->nm_register != netmap_vp_reg) {
1479341477Svmaffione		error = EINVAL;
1480341477Svmaffione		goto err;
1481270063Sluigi	}
1482259412Sluigi
1483341477Svmaffione	vpna = (struct netmap_vp_adapter *)NA(ifp);
1484259412Sluigi
1485341477Svmaffione	/* we can only destroy ports that were created via NETMAP_BDG_NEWIF */
1486341477Svmaffione	if (vpna->autodelete) {
1487341477Svmaffione		error = EINVAL;
1488341477Svmaffione		goto err;
1489285349Sluigi	}
1490259412Sluigi
1491341477Svmaffione	/* also make sure that nobody is using the inferface */
1492341477Svmaffione	if (NETMAP_OWNED_BY_ANY(&vpna->up) ||
1493341477Svmaffione	    vpna->up.na_refcount > 1 /* any ref besides the one in nm_vi_create()? */) {
1494341477Svmaffione		error = EBUSY;
1495341477Svmaffione		goto err;
1496261909Sluigi	}
1497259412Sluigi
1498341477Svmaffione	NMG_UNLOCK();
1499259412Sluigi
1500342033Svmaffione	if (netmap_verbose)
1501342033Svmaffione		nm_prinf("destroying a persistent vale interface %s", ifp->if_xname);
1502341477Svmaffione	/* Linux requires all the references are released
1503341477Svmaffione	 * before unregister
1504270063Sluigi	 */
1505341477Svmaffione	netmap_detach(ifp);
1506341477Svmaffione	if_rele(ifp);
1507341477Svmaffione	nm_os_vi_detach(ifp);
1508259412Sluigi	return 0;
1509270063Sluigi
1510341477Svmaffioneerr:
1511341477Svmaffione	NMG_UNLOCK();
1512341477Svmaffione	if_rele(ifp);
1513270063Sluigi	return error;
1514259412Sluigi}
1515259412Sluigi
1516341477Svmaffionestatic int
1517341477Svmaffionenm_update_info(struct nmreq_register *req, struct netmap_adapter *na)
1518285349Sluigi{
1519341477Svmaffione	req->nr_rx_rings = na->num_rx_rings;
1520341477Svmaffione	req->nr_tx_rings = na->num_tx_rings;
1521341477Svmaffione	req->nr_rx_slots = na->num_rx_desc;
1522341477Svmaffione	req->nr_tx_slots = na->num_tx_desc;
1523341477Svmaffione	return netmap_mem_get_info(na->nm_mem, &req->nr_memsize, NULL,
1524341477Svmaffione					&req->nr_mem_id);
1525285349Sluigi}
1526285349Sluigi
1527341477Svmaffione
1528341477Svmaffione/*
1529341477Svmaffione * Create a virtual interface registered to the system.
1530341477Svmaffione * The interface will be attached to a bridge later.
1531341477Svmaffione */
1532341477Svmaffioneint
1533341477Svmaffionenetmap_vi_create(struct nmreq_header *hdr, int autodelete)
1534259412Sluigi{
1535341477Svmaffione	struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body;
1536341477Svmaffione	struct ifnet *ifp;
1537341477Svmaffione	struct netmap_vp_adapter *vpna;
1538341477Svmaffione	struct netmap_mem_d *nmd = NULL;
1539341477Svmaffione	int error;
1540285349Sluigi
1541341477Svmaffione	if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
1542341477Svmaffione		return EINVAL;
1543341477Svmaffione	}
1544285349Sluigi
1545341477Svmaffione	/* don't include VALE prefix */
1546341477Svmaffione	if (!strncmp(hdr->nr_name, NM_BDG_NAME, strlen(NM_BDG_NAME)))
1547341477Svmaffione		return EINVAL;
1548341477Svmaffione	if (strlen(hdr->nr_name) >= IFNAMSIZ) {
1549341477Svmaffione		return EINVAL;
1550341477Svmaffione	}
1551341477Svmaffione	ifp = ifunit_ref(hdr->nr_name);
1552341477Svmaffione	if (ifp) { /* already exist, cannot create new one */
1553341477Svmaffione		error = EEXIST;
1554341477Svmaffione		NMG_LOCK();
1555341477Svmaffione		if (NM_NA_VALID(ifp)) {
1556341477Svmaffione			int update_err = nm_update_info(req, NA(ifp));
1557341477Svmaffione			if (update_err)
1558341477Svmaffione				error = update_err;
1559341477Svmaffione		}
1560341477Svmaffione		NMG_UNLOCK();
1561341477Svmaffione		if_rele(ifp);
1562341477Svmaffione		return error;
1563341477Svmaffione	}
1564341477Svmaffione	error = nm_os_vi_persist(hdr->nr_name, &ifp);
1565341477Svmaffione	if (error)
1566341477Svmaffione		return error;
1567285349Sluigi
1568341477Svmaffione	NMG_LOCK();
1569341477Svmaffione	if (req->nr_mem_id) {
1570341477Svmaffione		nmd = netmap_mem_find(req->nr_mem_id);
1571341477Svmaffione		if (nmd == NULL) {
1572341477Svmaffione			error = EINVAL;
1573341477Svmaffione			goto err_1;
1574341477Svmaffione		}
1575341477Svmaffione	}
1576341477Svmaffione	/* netmap_vp_create creates a struct netmap_vp_adapter */
1577342033Svmaffione	error = netmap_vale_vp_create(hdr, ifp, nmd, &vpna);
1578341477Svmaffione	if (error) {
1579342033Svmaffione		if (netmap_debug & NM_DEBUG_VALE)
1580342033Svmaffione			nm_prerr("error %d", error);
1581341477Svmaffione		goto err_1;
1582341477Svmaffione	}
1583341477Svmaffione	/* persist-specific routines */
1584341477Svmaffione	vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl;
1585341477Svmaffione	if (!autodelete) {
1586341477Svmaffione		netmap_adapter_get(&vpna->up);
1587341477Svmaffione	} else {
1588341477Svmaffione		vpna->autodelete = 1;
1589341477Svmaffione	}
1590341477Svmaffione	NM_ATTACH_NA(ifp, &vpna->up);
1591341477Svmaffione	/* return the updated info */
1592341477Svmaffione	error = nm_update_info(req, &vpna->up);
1593341477Svmaffione	if (error) {
1594341477Svmaffione		goto err_2;
1595341477Svmaffione	}
1596344047Svmaffione	nm_prdis("returning nr_mem_id %d", req->nr_mem_id);
1597341477Svmaffione	if (nmd)
1598341477Svmaffione		netmap_mem_put(nmd);
1599341477Svmaffione	NMG_UNLOCK();
1600344047Svmaffione	nm_prdis("created %s", ifp->if_xname);
1601285349Sluigi	return 0;
1602341477Svmaffione
1603341477Svmaffioneerr_2:
1604341477Svmaffione	netmap_detach(ifp);
1605341477Svmaffioneerr_1:
1606341477Svmaffione	if (nmd)
1607341477Svmaffione		netmap_mem_put(nmd);
1608341477Svmaffione	NMG_UNLOCK();
1609341477Svmaffione	nm_os_vi_detach(ifp);
1610341477Svmaffione
1611341477Svmaffione	return error;
1612285349Sluigi}
1613285349Sluigi
1614259412Sluigi#endif /* WITH_VALE */
1615