1/*
2 * Copyright (C) 2013-2016 Universita` di Pisa
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *   1. Redistributions of source code must retain the above copyright
9 *      notice, this list of conditions and the following disclaimer.
10 *   2. Redistributions in binary form must reproduce the above copyright
11 *      notice, this list of conditions and the following disclaimer in the
12 *      documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27
28/*
29 * This module implements the VALE switch for netmap
30
31--- VALE SWITCH ---
32
33NMG_LOCK() serializes all modifications to switches and ports.
34A switch cannot be deleted until all ports are gone.
35
36For each switch, an SX lock (RWlock on linux) protects
37deletion of ports. When configuring or deleting a new port, the
38lock is acquired in exclusive mode (after holding NMG_LOCK).
39When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
40The lock is held throughout the entire forwarding cycle,
41during which the thread may incur in a page fault.
42Hence it is important that sleepable shared locks are used.
43
44On the rx ring, the per-port lock is grabbed initially to reserve
45a number of slot in the ring, then the lock is released,
46packets are copied from source to destination, and then
47the lock is acquired again and the receive ring is updated.
48(A similar thing is done on the tx ring for NIC and host stack
49ports attached to the switch)
50
51 */
52
53/*
54 * OS-specific code that is used only within this file.
55 * Other OS-specific code that must be accessed by drivers
56 * is present in netmap_kern.h
57 */
58
59#if defined(__FreeBSD__)
60#include <sys/cdefs.h> /* prerequisite */
61__FBSDID("$FreeBSD: stable/11/sys/dev/netmap/netmap_bdg.c 344047 2019-02-12 09:26:05Z vmaffione $");
62
63#include <sys/types.h>
64#include <sys/errno.h>
65#include <sys/param.h>	/* defines used in kernel.h */
66#include <sys/kernel.h>	/* types used in module initialization */
67#include <sys/conf.h>	/* cdevsw struct, UID, GID */
68#include <sys/sockio.h>
69#include <sys/socketvar.h>	/* struct socket */
70#include <sys/malloc.h>
71#include <sys/poll.h>
72#include <sys/rwlock.h>
73#include <sys/socket.h> /* sockaddrs */
74#include <sys/selinfo.h>
75#include <sys/sysctl.h>
76#include <net/if.h>
77#include <net/if_var.h>
78#include <net/bpf.h>		/* BIOCIMMEDIATE */
79#include <machine/bus.h>	/* bus_dmamap_* */
80#include <sys/endian.h>
81#include <sys/refcount.h>
82#include <sys/smp.h>
83
84
85#elif defined(linux)
86
87#include "bsd_glue.h"
88
89#elif defined(__APPLE__)
90
91#warning OSX support is only partial
92#include "osx_glue.h"
93
94#elif defined(_WIN32)
95#include "win_glue.h"
96
97#else
98
99#error	Unsupported platform
100
101#endif /* unsupported */
102
103/*
104 * common headers
105 */
106
107#include <net/netmap.h>
108#include <dev/netmap/netmap_kern.h>
109#include <dev/netmap/netmap_mem2.h>
110
111#include <dev/netmap/netmap_bdg.h>
112
113const char*
114netmap_bdg_name(struct netmap_vp_adapter *vp)
115{
116	struct nm_bridge *b = vp->na_bdg;
117	if (b == NULL)
118		return NULL;
119	return b->bdg_basename;
120}
121
122
123#ifndef CONFIG_NET_NS
124/*
125 * XXX in principle nm_bridges could be created dynamically
126 * Right now we have a static array and deletions are protected
127 * by an exclusive lock.
128 */
129struct nm_bridge *nm_bridges;
130#endif /* !CONFIG_NET_NS */
131
132
133static int
134nm_is_id_char(const char c)
135{
136	return (c >= 'a' && c <= 'z') ||
137	       (c >= 'A' && c <= 'Z') ||
138	       (c >= '0' && c <= '9') ||
139	       (c == '_');
140}
141
142/* Validate the name of a bdg port and return the
143 * position of the ":" character. */
144static int
145nm_bdg_name_validate(const char *name, size_t prefixlen)
146{
147	int colon_pos = -1;
148	int i;
149
150	if (!name || strlen(name) < prefixlen) {
151		return -1;
152	}
153
154	for (i = 0; i < NM_BDG_IFNAMSIZ && name[i]; i++) {
155		if (name[i] == ':') {
156			colon_pos = i;
157			break;
158		} else if (!nm_is_id_char(name[i])) {
159			return -1;
160		}
161	}
162
163	if (strlen(name) - colon_pos > IFNAMSIZ) {
164		/* interface name too long */
165		return -1;
166	}
167
168	return colon_pos;
169}
170
171/*
172 * locate a bridge among the existing ones.
173 * MUST BE CALLED WITH NMG_LOCK()
174 *
175 * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
176 * We assume that this is called with a name of at least NM_NAME chars.
177 */
178struct nm_bridge *
179nm_find_bridge(const char *name, int create, struct netmap_bdg_ops *ops)
180{
181	int i, namelen;
182	struct nm_bridge *b = NULL, *bridges;
183	u_int num_bridges;
184
185	NMG_LOCK_ASSERT();
186
187	netmap_bns_getbridges(&bridges, &num_bridges);
188
189	namelen = nm_bdg_name_validate(name,
190			(ops != NULL ? strlen(ops->name) : 0));
191	if (namelen < 0) {
192		nm_prerr("invalid bridge name %s", name ? name : NULL);
193		return NULL;
194	}
195
196	/* lookup the name, remember empty slot if there is one */
197	for (i = 0; i < num_bridges; i++) {
198		struct nm_bridge *x = bridges + i;
199
200		if ((x->bdg_flags & NM_BDG_ACTIVE) + x->bdg_active_ports == 0) {
201			if (create && b == NULL)
202				b = x;	/* record empty slot */
203		} else if (x->bdg_namelen != namelen) {
204			continue;
205		} else if (strncmp(name, x->bdg_basename, namelen) == 0) {
206			nm_prdis("found '%.*s' at %d", namelen, name, i);
207			b = x;
208			break;
209		}
210	}
211	if (i == num_bridges && b) { /* name not found, can create entry */
212		/* initialize the bridge */
213		nm_prdis("create new bridge %s with ports %d", b->bdg_basename,
214			b->bdg_active_ports);
215		b->ht = nm_os_malloc(sizeof(struct nm_hash_ent) * NM_BDG_HASH);
216		if (b->ht == NULL) {
217			nm_prerr("failed to allocate hash table");
218			return NULL;
219		}
220		strncpy(b->bdg_basename, name, namelen);
221		b->bdg_namelen = namelen;
222		b->bdg_active_ports = 0;
223		for (i = 0; i < NM_BDG_MAXPORTS; i++)
224			b->bdg_port_index[i] = i;
225		/* set the default function */
226		b->bdg_ops = b->bdg_saved_ops = *ops;
227		b->private_data = b->ht;
228		b->bdg_flags = 0;
229		NM_BNS_GET(b);
230	}
231	return b;
232}
233
234
235int
236netmap_bdg_free(struct nm_bridge *b)
237{
238	if ((b->bdg_flags & NM_BDG_ACTIVE) + b->bdg_active_ports != 0) {
239		return EBUSY;
240	}
241
242	nm_prdis("marking bridge %s as free", b->bdg_basename);
243	nm_os_free(b->ht);
244	memset(&b->bdg_ops, 0, sizeof(b->bdg_ops));
245	memset(&b->bdg_saved_ops, 0, sizeof(b->bdg_saved_ops));
246	b->bdg_flags = 0;
247	NM_BNS_PUT(b);
248	return 0;
249}
250
251/* Called by external kernel modules (e.g., Openvswitch).
252 * to modify the private data previously given to regops().
253 * 'name' may be just bridge's name (including ':' if it
254 * is not just NM_BDG_NAME).
255 * Called without NMG_LOCK.
256 */
257int
258netmap_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback,
259	void *callback_data, void *auth_token)
260{
261	void *private_data = NULL;
262	struct nm_bridge *b;
263	int error = 0;
264
265	NMG_LOCK();
266	b = nm_find_bridge(name, 0 /* don't create */, NULL);
267	if (!b) {
268		error = EINVAL;
269		goto unlock_update_priv;
270	}
271	if (!nm_bdg_valid_auth_token(b, auth_token)) {
272		error = EACCES;
273		goto unlock_update_priv;
274	}
275	BDG_WLOCK(b);
276	private_data = callback(b->private_data, callback_data, &error);
277	b->private_data = private_data;
278	BDG_WUNLOCK(b);
279
280unlock_update_priv:
281	NMG_UNLOCK();
282	return error;
283}
284
285
286
287/* remove from bridge b the ports in slots hw and sw
288 * (sw can be -1 if not needed)
289 */
290void
291netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
292{
293	int s_hw = hw, s_sw = sw;
294	int i, lim =b->bdg_active_ports;
295	uint32_t *tmp = b->tmp_bdg_port_index;
296
297	/*
298	New algorithm:
299	make a copy of bdg_port_index;
300	lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
301	in the array of bdg_port_index, replacing them with
302	entries from the bottom of the array;
303	decrement bdg_active_ports;
304	acquire BDG_WLOCK() and copy back the array.
305	 */
306
307	if (netmap_debug & NM_DEBUG_BDG)
308		nm_prinf("detach %d and %d (lim %d)", hw, sw, lim);
309	/* make a copy of the list of active ports, update it,
310	 * and then copy back within BDG_WLOCK().
311	 */
312	memcpy(b->tmp_bdg_port_index, b->bdg_port_index, sizeof(b->tmp_bdg_port_index));
313	for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
314		if (hw >= 0 && tmp[i] == hw) {
315			nm_prdis("detach hw %d at %d", hw, i);
316			lim--; /* point to last active port */
317			tmp[i] = tmp[lim]; /* swap with i */
318			tmp[lim] = hw;	/* now this is inactive */
319			hw = -1;
320		} else if (sw >= 0 && tmp[i] == sw) {
321			nm_prdis("detach sw %d at %d", sw, i);
322			lim--;
323			tmp[i] = tmp[lim];
324			tmp[lim] = sw;
325			sw = -1;
326		} else {
327			i++;
328		}
329	}
330	if (hw >= 0 || sw >= 0) {
331		nm_prerr("delete failed hw %d sw %d, should panic...", hw, sw);
332	}
333
334	BDG_WLOCK(b);
335	if (b->bdg_ops.dtor)
336		b->bdg_ops.dtor(b->bdg_ports[s_hw]);
337	b->bdg_ports[s_hw] = NULL;
338	if (s_sw >= 0) {
339		b->bdg_ports[s_sw] = NULL;
340	}
341	memcpy(b->bdg_port_index, b->tmp_bdg_port_index, sizeof(b->tmp_bdg_port_index));
342	b->bdg_active_ports = lim;
343	BDG_WUNLOCK(b);
344
345	nm_prdis("now %d active ports", lim);
346	netmap_bdg_free(b);
347}
348
349
350/* nm_bdg_ctl callback for VALE ports */
351int
352netmap_vp_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na)
353{
354	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
355	struct nm_bridge *b = vpna->na_bdg;
356
357	if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
358		return 0; /* nothing to do */
359	}
360	if (b) {
361		netmap_set_all_rings(na, 0 /* disable */);
362		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
363		vpna->na_bdg = NULL;
364		netmap_set_all_rings(na, 1 /* enable */);
365	}
366	/* I have took reference just for attach */
367	netmap_adapter_put(na);
368	return 0;
369}
370
371int
372netmap_default_bdg_attach(const char *name, struct netmap_adapter *na,
373		struct nm_bridge *b)
374{
375	return NM_NEED_BWRAP;
376}
377
378/* Try to get a reference to a netmap adapter attached to a VALE switch.
379 * If the adapter is found (or is created), this function returns 0, a
380 * non NULL pointer is returned into *na, and the caller holds a
381 * reference to the adapter.
382 * If an adapter is not found, then no reference is grabbed and the
383 * function returns an error code, or 0 if there is just a VALE prefix
384 * mismatch. Therefore the caller holds a reference when
385 * (*na != NULL && return == 0).
386 */
387int
388netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na,
389	struct netmap_mem_d *nmd, int create, struct netmap_bdg_ops *ops)
390{
391	char *nr_name = hdr->nr_name;
392	const char *ifname;
393	struct ifnet *ifp = NULL;
394	int error = 0;
395	struct netmap_vp_adapter *vpna, *hostna = NULL;
396	struct nm_bridge *b;
397	uint32_t i, j;
398	uint32_t cand = NM_BDG_NOPORT, cand2 = NM_BDG_NOPORT;
399	int needed;
400
401	*na = NULL;     /* default return value */
402
403	/* first try to see if this is a bridge port. */
404	NMG_LOCK_ASSERT();
405	if (strncmp(nr_name, ops->name, strlen(ops->name) - 1)) {
406		return 0;  /* no error, but no VALE prefix */
407	}
408
409	b = nm_find_bridge(nr_name, create, ops);
410	if (b == NULL) {
411		nm_prdis("no bridges available for '%s'", nr_name);
412		return (create ? ENOMEM : ENXIO);
413	}
414	if (strlen(nr_name) < b->bdg_namelen) /* impossible */
415		panic("x");
416
417	/* Now we are sure that name starts with the bridge's name,
418	 * lookup the port in the bridge. We need to scan the entire
419	 * list. It is not important to hold a WLOCK on the bridge
420	 * during the search because NMG_LOCK already guarantees
421	 * that there are no other possible writers.
422	 */
423
424	/* lookup in the local list of ports */
425	for (j = 0; j < b->bdg_active_ports; j++) {
426		i = b->bdg_port_index[j];
427		vpna = b->bdg_ports[i];
428		nm_prdis("checking %s", vpna->up.name);
429		if (!strcmp(vpna->up.name, nr_name)) {
430			netmap_adapter_get(&vpna->up);
431			nm_prdis("found existing if %s refs %d", nr_name)
432			*na = &vpna->up;
433			return 0;
434		}
435	}
436	/* not found, should we create it? */
437	if (!create)
438		return ENXIO;
439	/* yes we should, see if we have space to attach entries */
440	needed = 2; /* in some cases we only need 1 */
441	if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
442		nm_prerr("bridge full %d, cannot create new port", b->bdg_active_ports);
443		return ENOMEM;
444	}
445	/* record the next two ports available, but do not allocate yet */
446	cand = b->bdg_port_index[b->bdg_active_ports];
447	cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
448	nm_prdis("+++ bridge %s port %s used %d avail %d %d",
449		b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2);
450
451	/*
452	 * try see if there is a matching NIC with this name
453	 * (after the bridge's name)
454	 */
455	ifname = nr_name + b->bdg_namelen + 1;
456	ifp = ifunit_ref(ifname);
457	if (!ifp) {
458		/* Create an ephemeral virtual port.
459		 * This block contains all the ephemeral-specific logic.
460		 */
461
462		if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
463			error = EINVAL;
464			goto out;
465		}
466
467		/* bdg_netmap_attach creates a struct netmap_adapter */
468		error = b->bdg_ops.vp_create(hdr, NULL, nmd, &vpna);
469		if (error) {
470			if (netmap_debug & NM_DEBUG_BDG)
471				nm_prerr("error %d", error);
472			goto out;
473		}
474		/* shortcut - we can skip get_hw_na(),
475		 * ownership check and nm_bdg_attach()
476		 */
477
478	} else {
479		struct netmap_adapter *hw;
480
481		/* the vale:nic syntax is only valid for some commands */
482		switch (hdr->nr_reqtype) {
483		case NETMAP_REQ_VALE_ATTACH:
484		case NETMAP_REQ_VALE_DETACH:
485		case NETMAP_REQ_VALE_POLLING_ENABLE:
486		case NETMAP_REQ_VALE_POLLING_DISABLE:
487			break; /* ok */
488		default:
489			error = EINVAL;
490			goto out;
491		}
492
493		error = netmap_get_hw_na(ifp, nmd, &hw);
494		if (error || hw == NULL)
495			goto out;
496
497		/* host adapter might not be created */
498		error = hw->nm_bdg_attach(nr_name, hw, b);
499		if (error == NM_NEED_BWRAP) {
500			error = b->bdg_ops.bwrap_attach(nr_name, hw);
501		}
502		if (error)
503			goto out;
504		vpna = hw->na_vp;
505		hostna = hw->na_hostvp;
506		if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
507			/* Check if we need to skip the host rings. */
508			struct nmreq_vale_attach *areq =
509				(struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
510			if (areq->reg.nr_mode != NR_REG_NIC_SW) {
511				hostna = NULL;
512			}
513		}
514	}
515
516	BDG_WLOCK(b);
517	vpna->bdg_port = cand;
518	nm_prdis("NIC  %p to bridge port %d", vpna, cand);
519	/* bind the port to the bridge (virtual ports are not active) */
520	b->bdg_ports[cand] = vpna;
521	vpna->na_bdg = b;
522	b->bdg_active_ports++;
523	if (hostna != NULL) {
524		/* also bind the host stack to the bridge */
525		b->bdg_ports[cand2] = hostna;
526		hostna->bdg_port = cand2;
527		hostna->na_bdg = b;
528		b->bdg_active_ports++;
529		nm_prdis("host %p to bridge port %d", hostna, cand2);
530	}
531	nm_prdis("if %s refs %d", ifname, vpna->up.na_refcount);
532	BDG_WUNLOCK(b);
533	*na = &vpna->up;
534	netmap_adapter_get(*na);
535
536out:
537	if (ifp)
538		if_rele(ifp);
539
540	return error;
541}
542
543
544int
545nm_is_bwrap(struct netmap_adapter *na)
546{
547	return na->nm_register == netmap_bwrap_reg;
548}
549
550
551struct nm_bdg_polling_state;
552struct
553nm_bdg_kthread {
554	struct nm_kctx *nmk;
555	u_int qfirst;
556	u_int qlast;
557	struct nm_bdg_polling_state *bps;
558};
559
560struct nm_bdg_polling_state {
561	bool configured;
562	bool stopped;
563	struct netmap_bwrap_adapter *bna;
564	uint32_t mode;
565	u_int qfirst;
566	u_int qlast;
567	u_int cpu_from;
568	u_int ncpus;
569	struct nm_bdg_kthread *kthreads;
570};
571
572static void
573netmap_bwrap_polling(void *data)
574{
575	struct nm_bdg_kthread *nbk = data;
576	struct netmap_bwrap_adapter *bna;
577	u_int qfirst, qlast, i;
578	struct netmap_kring **kring0, *kring;
579
580	if (!nbk)
581		return;
582	qfirst = nbk->qfirst;
583	qlast = nbk->qlast;
584	bna = nbk->bps->bna;
585	kring0 = NMR(bna->hwna, NR_RX);
586
587	for (i = qfirst; i < qlast; i++) {
588		kring = kring0[i];
589		kring->nm_notify(kring, 0);
590	}
591}
592
593static int
594nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps)
595{
596	struct nm_kctx_cfg kcfg;
597	int i, j;
598
599	bps->kthreads = nm_os_malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus);
600	if (bps->kthreads == NULL)
601		return ENOMEM;
602
603	bzero(&kcfg, sizeof(kcfg));
604	kcfg.worker_fn = netmap_bwrap_polling;
605	for (i = 0; i < bps->ncpus; i++) {
606		struct nm_bdg_kthread *t = bps->kthreads + i;
607		int all = (bps->ncpus == 1 &&
608			bps->mode == NETMAP_POLLING_MODE_SINGLE_CPU);
609		int affinity = bps->cpu_from + i;
610
611		t->bps = bps;
612		t->qfirst = all ? bps->qfirst /* must be 0 */: affinity;
613		t->qlast = all ? bps->qlast : t->qfirst + 1;
614		if (netmap_verbose)
615			nm_prinf("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst,
616				t->qlast);
617
618		kcfg.type = i;
619		kcfg.worker_private = t;
620		t->nmk = nm_os_kctx_create(&kcfg, NULL);
621		if (t->nmk == NULL) {
622			goto cleanup;
623		}
624		nm_os_kctx_worker_setaff(t->nmk, affinity);
625	}
626	return 0;
627
628cleanup:
629	for (j = 0; j < i; j++) {
630		struct nm_bdg_kthread *t = bps->kthreads + i;
631		nm_os_kctx_destroy(t->nmk);
632	}
633	nm_os_free(bps->kthreads);
634	return EFAULT;
635}
636
637/* A variant of ptnetmap_start_kthreads() */
638static int
639nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps)
640{
641	int error, i, j;
642
643	if (!bps) {
644		nm_prerr("polling is not configured");
645		return EFAULT;
646	}
647	bps->stopped = false;
648
649	for (i = 0; i < bps->ncpus; i++) {
650		struct nm_bdg_kthread *t = bps->kthreads + i;
651		error = nm_os_kctx_worker_start(t->nmk);
652		if (error) {
653			nm_prerr("error in nm_kthread_start(): %d", error);
654			goto cleanup;
655		}
656	}
657	return 0;
658
659cleanup:
660	for (j = 0; j < i; j++) {
661		struct nm_bdg_kthread *t = bps->kthreads + i;
662		nm_os_kctx_worker_stop(t->nmk);
663	}
664	bps->stopped = true;
665	return error;
666}
667
668static void
669nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps)
670{
671	int i;
672
673	if (!bps)
674		return;
675
676	for (i = 0; i < bps->ncpus; i++) {
677		struct nm_bdg_kthread *t = bps->kthreads + i;
678		nm_os_kctx_worker_stop(t->nmk);
679		nm_os_kctx_destroy(t->nmk);
680	}
681	bps->stopped = true;
682}
683
684static int
685get_polling_cfg(struct nmreq_vale_polling *req, struct netmap_adapter *na,
686		struct nm_bdg_polling_state *bps)
687{
688	unsigned int avail_cpus, core_from;
689	unsigned int qfirst, qlast;
690	uint32_t i = req->nr_first_cpu_id;
691	uint32_t req_cpus = req->nr_num_polling_cpus;
692
693	avail_cpus = nm_os_ncpus();
694
695	if (req_cpus == 0) {
696		nm_prerr("req_cpus must be > 0");
697		return EINVAL;
698	} else if (req_cpus >= avail_cpus) {
699		nm_prerr("Cannot use all the CPUs in the system");
700		return EINVAL;
701	}
702
703	if (req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU) {
704		/* Use a separate core for each ring. If nr_num_polling_cpus>1
705		 * more consecutive rings are polled.
706		 * For example, if nr_first_cpu_id=2 and nr_num_polling_cpus=2,
707		 * ring 2 and 3 are polled by core 2 and 3, respectively. */
708		if (i + req_cpus > nma_get_nrings(na, NR_RX)) {
709			nm_prerr("Rings %u-%u not in range (have %d rings)",
710				i, i + req_cpus, nma_get_nrings(na, NR_RX));
711			return EINVAL;
712		}
713		qfirst = i;
714		qlast = qfirst + req_cpus;
715		core_from = qfirst;
716
717	} else if (req->nr_mode == NETMAP_POLLING_MODE_SINGLE_CPU) {
718		/* Poll all the rings using a core specified by nr_first_cpu_id.
719		 * the number of cores must be 1. */
720		if (req_cpus != 1) {
721			nm_prerr("ncpus must be 1 for NETMAP_POLLING_MODE_SINGLE_CPU "
722				"(was %d)", req_cpus);
723			return EINVAL;
724		}
725		qfirst = 0;
726		qlast = nma_get_nrings(na, NR_RX);
727		core_from = i;
728	} else {
729		nm_prerr("Invalid polling mode");
730		return EINVAL;
731	}
732
733	bps->mode = req->nr_mode;
734	bps->qfirst = qfirst;
735	bps->qlast = qlast;
736	bps->cpu_from = core_from;
737	bps->ncpus = req_cpus;
738	nm_prinf("%s qfirst %u qlast %u cpu_from %u ncpus %u",
739		req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU ?
740		"MULTI" : "SINGLE",
741		qfirst, qlast, core_from, req_cpus);
742	return 0;
743}
744
745static int
746nm_bdg_ctl_polling_start(struct nmreq_vale_polling *req, struct netmap_adapter *na)
747{
748	struct nm_bdg_polling_state *bps;
749	struct netmap_bwrap_adapter *bna;
750	int error;
751
752	bna = (struct netmap_bwrap_adapter *)na;
753	if (bna->na_polling_state) {
754		nm_prerr("ERROR adapter already in polling mode");
755		return EFAULT;
756	}
757
758	bps = nm_os_malloc(sizeof(*bps));
759	if (!bps)
760		return ENOMEM;
761	bps->configured = false;
762	bps->stopped = true;
763
764	if (get_polling_cfg(req, na, bps)) {
765		nm_os_free(bps);
766		return EINVAL;
767	}
768
769	if (nm_bdg_create_kthreads(bps)) {
770		nm_os_free(bps);
771		return EFAULT;
772	}
773
774	bps->configured = true;
775	bna->na_polling_state = bps;
776	bps->bna = bna;
777
778	/* disable interrupts if possible */
779	nma_intr_enable(bna->hwna, 0);
780	/* start kthread now */
781	error = nm_bdg_polling_start_kthreads(bps);
782	if (error) {
783		nm_prerr("ERROR nm_bdg_polling_start_kthread()");
784		nm_os_free(bps->kthreads);
785		nm_os_free(bps);
786		bna->na_polling_state = NULL;
787		nma_intr_enable(bna->hwna, 1);
788	}
789	return error;
790}
791
792static int
793nm_bdg_ctl_polling_stop(struct netmap_adapter *na)
794{
795	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na;
796	struct nm_bdg_polling_state *bps;
797
798	if (!bna->na_polling_state) {
799		nm_prerr("ERROR adapter is not in polling mode");
800		return EFAULT;
801	}
802	bps = bna->na_polling_state;
803	nm_bdg_polling_stop_delete_kthreads(bna->na_polling_state);
804	bps->configured = false;
805	nm_os_free(bps);
806	bna->na_polling_state = NULL;
807	/* reenable interrupts */
808	nma_intr_enable(bna->hwna, 1);
809	return 0;
810}
811
812int
813nm_bdg_polling(struct nmreq_header *hdr)
814{
815	struct nmreq_vale_polling *req =
816		(struct nmreq_vale_polling *)(uintptr_t)hdr->nr_body;
817	struct netmap_adapter *na = NULL;
818	int error = 0;
819
820	NMG_LOCK();
821	error = netmap_get_vale_na(hdr, &na, NULL, /*create=*/0);
822	if (na && !error) {
823		if (!nm_is_bwrap(na)) {
824			error = EOPNOTSUPP;
825		} else if (hdr->nr_reqtype == NETMAP_BDG_POLLING_ON) {
826			error = nm_bdg_ctl_polling_start(req, na);
827			if (!error)
828				netmap_adapter_get(na);
829		} else {
830			error = nm_bdg_ctl_polling_stop(na);
831			if (!error)
832				netmap_adapter_put(na);
833		}
834		netmap_adapter_put(na);
835	} else if (!na && !error) {
836		/* Not VALE port. */
837		error = EINVAL;
838	}
839	NMG_UNLOCK();
840
841	return error;
842}
843
844/* Called by external kernel modules (e.g., Openvswitch).
845 * to set configure/lookup/dtor functions of a VALE instance.
846 * Register callbacks to the given bridge. 'name' may be just
847 * bridge's name (including ':' if it is not just NM_BDG_NAME).
848 *
849 * Called without NMG_LOCK.
850 */
851
852int
853netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token)
854{
855	struct nm_bridge *b;
856	int error = 0;
857
858	NMG_LOCK();
859	b = nm_find_bridge(name, 0 /* don't create */, NULL);
860	if (!b) {
861		error = ENXIO;
862		goto unlock_regops;
863	}
864	if (!nm_bdg_valid_auth_token(b, auth_token)) {
865		error = EACCES;
866		goto unlock_regops;
867	}
868
869	BDG_WLOCK(b);
870	if (!bdg_ops) {
871		/* resetting the bridge */
872		bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
873		b->bdg_ops = b->bdg_saved_ops;
874		b->private_data = b->ht;
875	} else {
876		/* modifying the bridge */
877		b->private_data = private_data;
878#define nm_bdg_override(m) if (bdg_ops->m) b->bdg_ops.m = bdg_ops->m
879		nm_bdg_override(lookup);
880		nm_bdg_override(config);
881		nm_bdg_override(dtor);
882		nm_bdg_override(vp_create);
883		nm_bdg_override(bwrap_attach);
884#undef nm_bdg_override
885
886	}
887	BDG_WUNLOCK(b);
888
889unlock_regops:
890	NMG_UNLOCK();
891	return error;
892}
893
894
895int
896netmap_bdg_config(struct nm_ifreq *nr)
897{
898	struct nm_bridge *b;
899	int error = EINVAL;
900
901	NMG_LOCK();
902	b = nm_find_bridge(nr->nifr_name, 0, NULL);
903	if (!b) {
904		NMG_UNLOCK();
905		return error;
906	}
907	NMG_UNLOCK();
908	/* Don't call config() with NMG_LOCK() held */
909	BDG_RLOCK(b);
910	if (b->bdg_ops.config != NULL)
911		error = b->bdg_ops.config(nr);
912	BDG_RUNLOCK(b);
913	return error;
914}
915
916
917/* nm_register callback for VALE ports */
918int
919netmap_vp_reg(struct netmap_adapter *na, int onoff)
920{
921	struct netmap_vp_adapter *vpna =
922		(struct netmap_vp_adapter*)na;
923
924	/* persistent ports may be put in netmap mode
925	 * before being attached to a bridge
926	 */
927	if (vpna->na_bdg)
928		BDG_WLOCK(vpna->na_bdg);
929	if (onoff) {
930		netmap_krings_mode_commit(na, onoff);
931		if (na->active_fds == 0)
932			na->na_flags |= NAF_NETMAP_ON;
933		 /* XXX on FreeBSD, persistent VALE ports should also
934		 * toggle IFCAP_NETMAP in na->ifp (2014-03-16)
935		 */
936	} else {
937		if (na->active_fds == 0)
938			na->na_flags &= ~NAF_NETMAP_ON;
939		netmap_krings_mode_commit(na, onoff);
940	}
941	if (vpna->na_bdg)
942		BDG_WUNLOCK(vpna->na_bdg);
943	return 0;
944}
945
946
947/* rxsync code used by VALE ports nm_rxsync callback and also
948 * internally by the brwap
949 */
950static int
951netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags)
952{
953	struct netmap_adapter *na = kring->na;
954	struct netmap_ring *ring = kring->ring;
955	u_int nm_i, lim = kring->nkr_num_slots - 1;
956	u_int head = kring->rhead;
957	int n;
958
959	if (head > lim) {
960		nm_prerr("ouch dangerous reset!!!");
961		n = netmap_ring_reinit(kring);
962		goto done;
963	}
964
965	/* First part, import newly received packets. */
966	/* actually nothing to do here, they are already in the kring */
967
968	/* Second part, skip past packets that userspace has released. */
969	nm_i = kring->nr_hwcur;
970	if (nm_i != head) {
971		/* consistency check, but nothing really important here */
972		for (n = 0; likely(nm_i != head); n++) {
973			struct netmap_slot *slot = &ring->slot[nm_i];
974			void *addr = NMB(na, slot);
975
976			if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */
977				nm_prerr("bad buffer index %d, ignore ?",
978					slot->buf_idx);
979			}
980			slot->flags &= ~NS_BUF_CHANGED;
981			nm_i = nm_next(nm_i, lim);
982		}
983		kring->nr_hwcur = head;
984	}
985
986	n = 0;
987done:
988	return n;
989}
990
991/*
992 * nm_rxsync callback for VALE ports
993 * user process reading from a VALE switch.
994 * Already protected against concurrent calls from userspace,
995 * but we must acquire the queue's lock to protect against
996 * writers on the same queue.
997 */
998int
999netmap_vp_rxsync(struct netmap_kring *kring, int flags)
1000{
1001	int n;
1002
1003	mtx_lock(&kring->q_lock);
1004	n = netmap_vp_rxsync_locked(kring, flags);
1005	mtx_unlock(&kring->q_lock);
1006	return n;
1007}
1008
1009int
1010netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna,
1011		struct netmap_bdg_ops *ops)
1012{
1013	return ops->bwrap_attach(nr_name, hwna);
1014}
1015
1016
1017/* Bridge wrapper code (bwrap).
1018 * This is used to connect a non-VALE-port netmap_adapter (hwna) to a
1019 * VALE switch.
1020 * The main task is to swap the meaning of tx and rx rings to match the
1021 * expectations of the VALE switch code (see nm_bdg_flush).
1022 *
1023 * The bwrap works by interposing a netmap_bwrap_adapter between the
1024 * rest of the system and the hwna. The netmap_bwrap_adapter looks like
1025 * a netmap_vp_adapter to the rest the system, but, internally, it
1026 * translates all callbacks to what the hwna expects.
1027 *
1028 * Note that we have to intercept callbacks coming from two sides:
1029 *
1030 *  - callbacks coming from the netmap module are intercepted by
1031 *    passing around the netmap_bwrap_adapter instead of the hwna
1032 *
1033 *  - callbacks coming from outside of the netmap module only know
1034 *    about the hwna. This, however, only happens in interrupt
1035 *    handlers, where only the hwna->nm_notify callback is called.
1036 *    What the bwrap does is to overwrite the hwna->nm_notify callback
1037 *    with its own netmap_bwrap_intr_notify.
1038 *    XXX This assumes that the hwna->nm_notify callback was the
1039 *    standard netmap_notify(), as it is the case for nic adapters.
1040 *    Any additional action performed by hwna->nm_notify will not be
1041 *    performed by netmap_bwrap_intr_notify.
1042 *
1043 * Additionally, the bwrap can optionally attach the host rings pair
1044 * of the wrapped adapter to a different port of the switch.
1045 */
1046
1047
1048static void
1049netmap_bwrap_dtor(struct netmap_adapter *na)
1050{
1051	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
1052	struct netmap_adapter *hwna = bna->hwna;
1053	struct nm_bridge *b = bna->up.na_bdg,
1054		*bh = bna->host.na_bdg;
1055
1056	if (bna->host.up.nm_mem)
1057		netmap_mem_put(bna->host.up.nm_mem);
1058
1059	if (b) {
1060		netmap_bdg_detach_common(b, bna->up.bdg_port,
1061			    (bh ? bna->host.bdg_port : -1));
1062	}
1063
1064	nm_prdis("na %p", na);
1065	na->ifp = NULL;
1066	bna->host.up.ifp = NULL;
1067	hwna->na_vp = bna->saved_na_vp;
1068	hwna->na_hostvp = NULL;
1069	hwna->na_private = NULL;
1070	hwna->na_flags &= ~NAF_BUSY;
1071	netmap_adapter_put(hwna);
1072
1073}
1074
1075
1076/*
1077 * Intr callback for NICs connected to a bridge.
1078 * Simply ignore tx interrupts (maybe we could try to recover space ?)
1079 * and pass received packets from nic to the bridge.
1080 *
1081 * XXX TODO check locking: this is called from the interrupt
1082 * handler so we should make sure that the interface is not
1083 * disconnected while passing down an interrupt.
1084 *
1085 * Note, no user process can access this NIC or the host stack.
1086 * The only part of the ring that is significant are the slots,
1087 * and head/cur/tail are set from the kring as needed
1088 * (part as a receive ring, part as a transmit ring).
1089 *
1090 * callback that overwrites the hwna notify callback.
1091 * Packets come from the outside or from the host stack and are put on an
1092 * hwna rx ring.
1093 * The bridge wrapper then sends the packets through the bridge.
1094 */
1095static int
1096netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags)
1097{
1098	struct netmap_adapter *na = kring->na;
1099	struct netmap_bwrap_adapter *bna = na->na_private;
1100	struct netmap_kring *bkring;
1101	struct netmap_vp_adapter *vpna = &bna->up;
1102	u_int ring_nr = kring->ring_id;
1103	int ret = NM_IRQ_COMPLETED;
1104	int error;
1105
1106	if (netmap_debug & NM_DEBUG_RXINTR)
1107	    nm_prinf("%s %s 0x%x", na->name, kring->name, flags);
1108
1109	bkring = vpna->up.tx_rings[ring_nr];
1110
1111	/* make sure the ring is not disabled */
1112	if (nm_kr_tryget(kring, 0 /* can't sleep */, NULL)) {
1113		return EIO;
1114	}
1115
1116	if (netmap_debug & NM_DEBUG_RXINTR)
1117	    nm_prinf("%s head %d cur %d tail %d",  na->name,
1118		kring->rhead, kring->rcur, kring->rtail);
1119
1120	/* simulate a user wakeup on the rx ring
1121	 * fetch packets that have arrived.
1122	 */
1123	error = kring->nm_sync(kring, 0);
1124	if (error)
1125		goto put_out;
1126	if (kring->nr_hwcur == kring->nr_hwtail) {
1127		if (netmap_verbose)
1128			nm_prlim(1, "interrupt with no packets on %s",
1129				kring->name);
1130		goto put_out;
1131	}
1132
1133	/* new packets are kring->rcur to kring->nr_hwtail, and the bkring
1134	 * had hwcur == bkring->rhead. So advance bkring->rhead to kring->nr_hwtail
1135	 * to push all packets out.
1136	 */
1137	bkring->rhead = bkring->rcur = kring->nr_hwtail;
1138
1139	bkring->nm_sync(bkring, flags);
1140
1141	/* mark all buffers as released on this ring */
1142	kring->rhead = kring->rcur = kring->rtail = kring->nr_hwtail;
1143	/* another call to actually release the buffers */
1144	error = kring->nm_sync(kring, 0);
1145
1146	/* The second rxsync may have further advanced hwtail. If this happens,
1147	 *  return NM_IRQ_RESCHED, otherwise just return NM_IRQ_COMPLETED. */
1148	if (kring->rcur != kring->nr_hwtail) {
1149		ret = NM_IRQ_RESCHED;
1150	}
1151put_out:
1152	nm_kr_put(kring);
1153
1154	return error ? error : ret;
1155}
1156
1157
1158/* nm_register callback for bwrap */
1159int
1160netmap_bwrap_reg(struct netmap_adapter *na, int onoff)
1161{
1162	struct netmap_bwrap_adapter *bna =
1163		(struct netmap_bwrap_adapter *)na;
1164	struct netmap_adapter *hwna = bna->hwna;
1165	struct netmap_vp_adapter *hostna = &bna->host;
1166	int error, i;
1167	enum txrx t;
1168
1169	nm_prdis("%s %s", na->name, onoff ? "on" : "off");
1170
1171	if (onoff) {
1172		/* netmap_do_regif has been called on the bwrap na.
1173		 * We need to pass the information about the
1174		 * memory allocator down to the hwna before
1175		 * putting it in netmap mode
1176		 */
1177		hwna->na_lut = na->na_lut;
1178
1179		if (hostna->na_bdg) {
1180			/* if the host rings have been attached to switch,
1181			 * we need to copy the memory allocator information
1182			 * in the hostna also
1183			 */
1184			hostna->up.na_lut = na->na_lut;
1185		}
1186
1187	}
1188
1189	/* pass down the pending ring state information */
1190	for_rx_tx(t) {
1191		for (i = 0; i < netmap_all_rings(na, t); i++) {
1192			NMR(hwna, nm_txrx_swap(t))[i]->nr_pending_mode =
1193				NMR(na, t)[i]->nr_pending_mode;
1194		}
1195	}
1196
1197	/* forward the request to the hwna */
1198	error = hwna->nm_register(hwna, onoff);
1199	if (error)
1200		return error;
1201
1202	/* copy up the current ring state information */
1203	for_rx_tx(t) {
1204		for (i = 0; i < netmap_all_rings(na, t); i++) {
1205			struct netmap_kring *kring = NMR(hwna, nm_txrx_swap(t))[i];
1206			NMR(na, t)[i]->nr_mode = kring->nr_mode;
1207		}
1208	}
1209
1210	/* impersonate a netmap_vp_adapter */
1211	netmap_vp_reg(na, onoff);
1212	if (hostna->na_bdg)
1213		netmap_vp_reg(&hostna->up, onoff);
1214
1215	if (onoff) {
1216		u_int i;
1217		/* intercept the hwna nm_nofify callback on the hw rings */
1218		for (i = 0; i < hwna->num_rx_rings; i++) {
1219			hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify;
1220			hwna->rx_rings[i]->nm_notify = netmap_bwrap_intr_notify;
1221		}
1222		i = hwna->num_rx_rings; /* for safety */
1223		/* save the host ring notify unconditionally */
1224		for (; i < netmap_real_rings(hwna, NR_RX); i++) {
1225			hwna->rx_rings[i]->save_notify =
1226				hwna->rx_rings[i]->nm_notify;
1227			if (hostna->na_bdg) {
1228				/* also intercept the host ring notify */
1229				hwna->rx_rings[i]->nm_notify =
1230					netmap_bwrap_intr_notify;
1231				na->tx_rings[i]->nm_sync = na->nm_txsync;
1232			}
1233		}
1234		if (na->active_fds == 0)
1235			na->na_flags |= NAF_NETMAP_ON;
1236	} else {
1237		u_int i;
1238
1239		if (na->active_fds == 0)
1240			na->na_flags &= ~NAF_NETMAP_ON;
1241
1242		/* reset all notify callbacks (including host ring) */
1243		for (i = 0; i < netmap_all_rings(hwna, NR_RX); i++) {
1244			hwna->rx_rings[i]->nm_notify =
1245				hwna->rx_rings[i]->save_notify;
1246			hwna->rx_rings[i]->save_notify = NULL;
1247		}
1248		hwna->na_lut.lut = NULL;
1249		hwna->na_lut.plut = NULL;
1250		hwna->na_lut.objtotal = 0;
1251		hwna->na_lut.objsize = 0;
1252
1253		/* pass ownership of the netmap rings to the hwna */
1254		for_rx_tx(t) {
1255			for (i = 0; i < netmap_all_rings(na, t); i++) {
1256				NMR(na, t)[i]->ring = NULL;
1257			}
1258		}
1259		/* reset the number of host rings to default */
1260		for_rx_tx(t) {
1261			nma_set_host_nrings(hwna, t, 1);
1262		}
1263
1264	}
1265
1266	return 0;
1267}
1268
1269/* nm_config callback for bwrap */
1270static int
1271netmap_bwrap_config(struct netmap_adapter *na, struct nm_config_info *info)
1272{
1273	struct netmap_bwrap_adapter *bna =
1274		(struct netmap_bwrap_adapter *)na;
1275	struct netmap_adapter *hwna = bna->hwna;
1276	int error;
1277
1278	/* Forward the request to the hwna. It may happen that nobody
1279	 * registered hwna yet, so netmap_mem_get_lut() may have not
1280	 * been called yet. */
1281	error = netmap_mem_get_lut(hwna->nm_mem, &hwna->na_lut);
1282	if (error)
1283		return error;
1284	netmap_update_config(hwna);
1285	/* swap the results and propagate */
1286	info->num_tx_rings = hwna->num_rx_rings;
1287	info->num_tx_descs = hwna->num_rx_desc;
1288	info->num_rx_rings = hwna->num_tx_rings;
1289	info->num_rx_descs = hwna->num_tx_desc;
1290	info->rx_buf_maxsize = hwna->rx_buf_maxsize;
1291
1292	return 0;
1293}
1294
1295
1296/* nm_krings_create callback for bwrap */
1297int
1298netmap_bwrap_krings_create_common(struct netmap_adapter *na)
1299{
1300	struct netmap_bwrap_adapter *bna =
1301		(struct netmap_bwrap_adapter *)na;
1302	struct netmap_adapter *hwna = bna->hwna;
1303	struct netmap_adapter *hostna = &bna->host.up;
1304	int i, error = 0;
1305	enum txrx t;
1306
1307	/* also create the hwna krings */
1308	error = hwna->nm_krings_create(hwna);
1309	if (error) {
1310		return error;
1311	}
1312
1313	/* increment the usage counter for all the hwna krings */
1314	for_rx_tx(t) {
1315		for (i = 0; i < netmap_all_rings(hwna, t); i++) {
1316			NMR(hwna, t)[i]->users++;
1317		}
1318	}
1319
1320	/* now create the actual rings */
1321	error = netmap_mem_rings_create(hwna);
1322	if (error) {
1323		goto err_dec_users;
1324	}
1325
1326	/* cross-link the netmap rings
1327	 * The original number of rings comes from hwna,
1328	 * rx rings on one side equals tx rings on the other.
1329	 */
1330	for_rx_tx(t) {
1331		enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
1332		for (i = 0; i < netmap_all_rings(hwna, r); i++) {
1333			NMR(na, t)[i]->nkr_num_slots = NMR(hwna, r)[i]->nkr_num_slots;
1334			NMR(na, t)[i]->ring = NMR(hwna, r)[i]->ring;
1335		}
1336	}
1337
1338	if (na->na_flags & NAF_HOST_RINGS) {
1339		/* the hostna rings are the host rings of the bwrap.
1340		 * The corresponding krings must point back to the
1341		 * hostna
1342		 */
1343		hostna->tx_rings = &na->tx_rings[na->num_tx_rings];
1344		hostna->rx_rings = &na->rx_rings[na->num_rx_rings];
1345		for_rx_tx(t) {
1346			for (i = 0; i < nma_get_nrings(hostna, t); i++) {
1347				NMR(hostna, t)[i]->na = hostna;
1348			}
1349		}
1350	}
1351
1352	return 0;
1353
1354err_dec_users:
1355	for_rx_tx(t) {
1356		for (i = 0; i < netmap_all_rings(hwna, t); i++) {
1357			NMR(hwna, t)[i]->users--;
1358		}
1359	}
1360	hwna->nm_krings_delete(hwna);
1361	return error;
1362}
1363
1364
1365void
1366netmap_bwrap_krings_delete_common(struct netmap_adapter *na)
1367{
1368	struct netmap_bwrap_adapter *bna =
1369		(struct netmap_bwrap_adapter *)na;
1370	struct netmap_adapter *hwna = bna->hwna;
1371	enum txrx t;
1372	int i;
1373
1374	nm_prdis("%s", na->name);
1375
1376	/* decrement the usage counter for all the hwna krings */
1377	for_rx_tx(t) {
1378		for (i = 0; i < netmap_all_rings(hwna, t); i++) {
1379			NMR(hwna, t)[i]->users--;
1380		}
1381	}
1382
1383	/* delete any netmap rings that are no longer needed */
1384	netmap_mem_rings_delete(hwna);
1385	hwna->nm_krings_delete(hwna);
1386}
1387
1388
1389/* notify method for the bridge-->hwna direction */
1390int
1391netmap_bwrap_notify(struct netmap_kring *kring, int flags)
1392{
1393	struct netmap_adapter *na = kring->na;
1394	struct netmap_bwrap_adapter *bna = na->na_private;
1395	struct netmap_adapter *hwna = bna->hwna;
1396	u_int ring_n = kring->ring_id;
1397	u_int lim = kring->nkr_num_slots - 1;
1398	struct netmap_kring *hw_kring;
1399	int error;
1400
1401	nm_prdis("%s: na %s hwna %s",
1402			(kring ? kring->name : "NULL!"),
1403			(na ? na->name : "NULL!"),
1404			(hwna ? hwna->name : "NULL!"));
1405	hw_kring = hwna->tx_rings[ring_n];
1406
1407	if (nm_kr_tryget(hw_kring, 0, NULL)) {
1408		return ENXIO;
1409	}
1410
1411	/* first step: simulate a user wakeup on the rx ring */
1412	netmap_vp_rxsync(kring, flags);
1413	nm_prdis("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
1414		na->name, ring_n,
1415		kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
1416		kring->rhead, kring->rcur, kring->rtail,
1417		hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
1418	/* second step: the new packets are sent on the tx ring
1419	 * (which is actually the same ring)
1420	 */
1421	hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail;
1422	error = hw_kring->nm_sync(hw_kring, flags);
1423	if (error)
1424		goto put_out;
1425
1426	/* third step: now we are back the rx ring */
1427	/* claim ownership on all hw owned bufs */
1428	kring->rhead = kring->rcur = nm_next(hw_kring->nr_hwtail, lim); /* skip past reserved slot */
1429
1430	/* fourth step: the user goes to sleep again, causing another rxsync */
1431	netmap_vp_rxsync(kring, flags);
1432	nm_prdis("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
1433		na->name, ring_n,
1434		kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
1435		kring->rhead, kring->rcur, kring->rtail,
1436		hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
1437put_out:
1438	nm_kr_put(hw_kring);
1439
1440	return error ? error : NM_IRQ_COMPLETED;
1441}
1442
1443
1444/* nm_bdg_ctl callback for the bwrap.
1445 * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd].
1446 * On attach, it needs to provide a fake netmap_priv_d structure and
1447 * perform a netmap_do_regif() on the bwrap. This will put both the
1448 * bwrap and the hwna in netmap mode, with the netmap rings shared
1449 * and cross linked. Moroever, it will start intercepting interrupts
1450 * directed to hwna.
1451 */
1452static int
1453netmap_bwrap_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na)
1454{
1455	struct netmap_priv_d *npriv;
1456	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
1457	int error = 0;
1458
1459	if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
1460		struct nmreq_vale_attach *req =
1461			(struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
1462		if (req->reg.nr_ringid != 0 ||
1463			(req->reg.nr_mode != NR_REG_ALL_NIC &&
1464				req->reg.nr_mode != NR_REG_NIC_SW)) {
1465			/* We only support attaching all the NIC rings
1466			 * and/or the host stack. */
1467			return EINVAL;
1468		}
1469		if (NETMAP_OWNED_BY_ANY(na)) {
1470			return EBUSY;
1471		}
1472		if (bna->na_kpriv) {
1473			/* nothing to do */
1474			return 0;
1475		}
1476		npriv = netmap_priv_new();
1477		if (npriv == NULL)
1478			return ENOMEM;
1479		npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */
1480		error = netmap_do_regif(npriv, na, req->reg.nr_mode,
1481					req->reg.nr_ringid, req->reg.nr_flags);
1482		if (error) {
1483			netmap_priv_delete(npriv);
1484			return error;
1485		}
1486		bna->na_kpriv = npriv;
1487		na->na_flags |= NAF_BUSY;
1488	} else {
1489		if (na->active_fds == 0) /* not registered */
1490			return EINVAL;
1491		netmap_priv_delete(bna->na_kpriv);
1492		bna->na_kpriv = NULL;
1493		na->na_flags &= ~NAF_BUSY;
1494	}
1495
1496	return error;
1497}
1498
1499/* attach a bridge wrapper to the 'real' device */
1500int
1501netmap_bwrap_attach_common(struct netmap_adapter *na,
1502		struct netmap_adapter *hwna)
1503{
1504	struct netmap_bwrap_adapter *bna;
1505	struct netmap_adapter *hostna = NULL;
1506	int error = 0;
1507	enum txrx t;
1508
1509	/* make sure the NIC is not already in use */
1510	if (NETMAP_OWNED_BY_ANY(hwna)) {
1511		nm_prerr("NIC %s busy, cannot attach to bridge", hwna->name);
1512		return EBUSY;
1513	}
1514
1515	bna = (struct netmap_bwrap_adapter *)na;
1516	/* make bwrap ifp point to the real ifp */
1517	na->ifp = hwna->ifp;
1518	if_ref(na->ifp);
1519	na->na_private = bna;
1520	/* fill the ring data for the bwrap adapter with rx/tx meanings
1521	 * swapped. The real cross-linking will be done during register,
1522	 * when all the krings will have been created.
1523	 */
1524	for_rx_tx(t) {
1525		enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
1526		nma_set_nrings(na, t, nma_get_nrings(hwna, r));
1527		nma_set_ndesc(na, t, nma_get_ndesc(hwna, r));
1528	}
1529	na->nm_dtor = netmap_bwrap_dtor;
1530	na->nm_config = netmap_bwrap_config;
1531	na->nm_bdg_ctl = netmap_bwrap_bdg_ctl;
1532	na->pdev = hwna->pdev;
1533	na->nm_mem = netmap_mem_get(hwna->nm_mem);
1534	na->virt_hdr_len = hwna->virt_hdr_len;
1535	na->rx_buf_maxsize = hwna->rx_buf_maxsize;
1536
1537	bna->hwna = hwna;
1538	netmap_adapter_get(hwna);
1539	hwna->na_private = bna; /* weak reference */
1540	bna->saved_na_vp = hwna->na_vp;
1541	hwna->na_vp = &bna->up;
1542	bna->up.up.na_vp = &(bna->up);
1543
1544	if (hwna->na_flags & NAF_HOST_RINGS) {
1545		if (hwna->na_flags & NAF_SW_ONLY)
1546			na->na_flags |= NAF_SW_ONLY;
1547		na->na_flags |= NAF_HOST_RINGS;
1548		hostna = &bna->host.up;
1549
1550		/* limit the number of host rings to that of hw */
1551		nm_bound_var(&hostna->num_tx_rings, 1, 1,
1552				nma_get_nrings(hwna, NR_TX), NULL);
1553		nm_bound_var(&hostna->num_rx_rings, 1, 1,
1554				nma_get_nrings(hwna, NR_RX), NULL);
1555
1556		snprintf(hostna->name, sizeof(hostna->name), "%s^", na->name);
1557		hostna->ifp = hwna->ifp;
1558		for_rx_tx(t) {
1559			enum txrx r = nm_txrx_swap(t);
1560			u_int nr = nma_get_nrings(hostna, t);
1561
1562			nma_set_nrings(hostna, t, nr);
1563			nma_set_host_nrings(na, t, nr);
1564			if (nma_get_host_nrings(hwna, t) < nr) {
1565				nma_set_host_nrings(hwna, t, nr);
1566			}
1567			nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r));
1568		}
1569		// hostna->nm_txsync = netmap_bwrap_host_txsync;
1570		// hostna->nm_rxsync = netmap_bwrap_host_rxsync;
1571		hostna->nm_mem = netmap_mem_get(na->nm_mem);
1572		hostna->na_private = bna;
1573		hostna->na_vp = &bna->up;
1574		na->na_hostvp = hwna->na_hostvp =
1575			hostna->na_hostvp = &bna->host;
1576		hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */
1577		hostna->rx_buf_maxsize = hwna->rx_buf_maxsize;
1578	}
1579	if (hwna->na_flags & NAF_MOREFRAG)
1580		na->na_flags |= NAF_MOREFRAG;
1581
1582	nm_prdis("%s<->%s txr %d txd %d rxr %d rxd %d",
1583		na->name, ifp->if_xname,
1584		na->num_tx_rings, na->num_tx_desc,
1585		na->num_rx_rings, na->num_rx_desc);
1586
1587	error = netmap_attach_common(na);
1588	if (error) {
1589		goto err_put;
1590	}
1591	hwna->na_flags |= NAF_BUSY;
1592	return 0;
1593
1594err_put:
1595	hwna->na_vp = hwna->na_hostvp = NULL;
1596	netmap_adapter_put(hwna);
1597	return error;
1598
1599}
1600
1601struct nm_bridge *
1602netmap_init_bridges2(u_int n)
1603{
1604	int i;
1605	struct nm_bridge *b;
1606
1607	b = nm_os_malloc(sizeof(struct nm_bridge) * n);
1608	if (b == NULL)
1609		return NULL;
1610	for (i = 0; i < n; i++)
1611		BDG_RWINIT(&b[i]);
1612	return b;
1613}
1614
1615void
1616netmap_uninit_bridges2(struct nm_bridge *b, u_int n)
1617{
1618	int i;
1619
1620	if (b == NULL)
1621		return;
1622
1623	for (i = 0; i < n; i++)
1624		BDG_RWDESTROY(&b[i]);
1625	nm_os_free(b);
1626}
1627
1628int
1629netmap_init_bridges(void)
1630{
1631#ifdef CONFIG_NET_NS
1632	return netmap_bns_register();
1633#else
1634	nm_bridges = netmap_init_bridges2(NM_BRIDGES);
1635	if (nm_bridges == NULL)
1636		return ENOMEM;
1637	return 0;
1638#endif
1639}
1640
1641void
1642netmap_uninit_bridges(void)
1643{
1644#ifdef CONFIG_NET_NS
1645	netmap_bns_unregister();
1646#else
1647	netmap_uninit_bridges2(nm_bridges, NM_BRIDGES);
1648#endif
1649}
1650