netmap_pipe.c revision 285697
1/*
2 * Copyright (C) 2014 Giuseppe Lettieri. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *   1. Redistributions of source code must retain the above copyright
8 *      notice, this list of conditions and the following disclaimer.
9 *   2. Redistributions in binary form must reproduce the above copyright
10 *      notice, this list of conditions and the following disclaimer in the
11 *      documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26/* $FreeBSD: head/sys/dev/netmap/netmap_pipe.c 285697 2015-07-19 18:05:49Z luigi $ */
27
28#if defined(__FreeBSD__)
29#include <sys/cdefs.h> /* prerequisite */
30
31#include <sys/types.h>
32#include <sys/errno.h>
33#include <sys/param.h>	/* defines used in kernel.h */
34#include <sys/kernel.h>	/* types used in module initialization */
35#include <sys/malloc.h>
36#include <sys/poll.h>
37#include <sys/lock.h>
38#include <sys/rwlock.h>
39#include <sys/selinfo.h>
40#include <sys/sysctl.h>
41#include <sys/socket.h> /* sockaddrs */
42#include <net/if.h>
43#include <net/if_var.h>
44#include <machine/bus.h>	/* bus_dmamap_* */
45#include <sys/refcount.h>
46
47
48#elif defined(linux)
49
50#include "bsd_glue.h"
51
52#elif defined(__APPLE__)
53
54#warning OSX support is only partial
55#include "osx_glue.h"
56
57#else
58
59#error	Unsupported platform
60
61#endif /* unsupported */
62
63/*
64 * common headers
65 */
66
67#include <net/netmap.h>
68#include <dev/netmap/netmap_kern.h>
69#include <dev/netmap/netmap_mem2.h>
70
71#ifdef WITH_PIPES
72
73#define NM_PIPE_MAXSLOTS	4096
74
75int netmap_default_pipes = 0; /* ignored, kept for compatibility */
76SYSCTL_DECL(_dev_netmap);
77SYSCTL_INT(_dev_netmap, OID_AUTO, default_pipes, CTLFLAG_RW, &netmap_default_pipes, 0 , "");
78
79/* allocate the pipe array in the parent adapter */
80static int
81nm_pipe_alloc(struct netmap_adapter *na, u_int npipes)
82{
83	size_t len;
84	struct netmap_pipe_adapter **npa;
85
86	if (npipes <= na->na_max_pipes)
87		/* we already have more entries that requested */
88		return 0;
89
90	if (npipes < na->na_next_pipe || npipes > NM_MAXPIPES)
91		return EINVAL;
92
93        len = sizeof(struct netmap_pipe_adapter *) * npipes;
94	npa = realloc(na->na_pipes, len, M_DEVBUF, M_NOWAIT | M_ZERO);
95	if (npa == NULL)
96		return ENOMEM;
97
98	na->na_pipes = npa;
99	na->na_max_pipes = npipes;
100
101	return 0;
102}
103
104/* deallocate the parent array in the parent adapter */
105void
106netmap_pipe_dealloc(struct netmap_adapter *na)
107{
108	if (na->na_pipes) {
109		if (na->na_next_pipe > 0) {
110			D("freeing not empty pipe array for %s (%d dangling pipes)!", na->name,
111					na->na_next_pipe);
112		}
113		free(na->na_pipes, M_DEVBUF);
114		na->na_pipes = NULL;
115		na->na_max_pipes = 0;
116		na->na_next_pipe = 0;
117	}
118}
119
120/* find a pipe endpoint with the given id among the parent's pipes */
121static struct netmap_pipe_adapter *
122netmap_pipe_find(struct netmap_adapter *parent, u_int pipe_id)
123{
124	int i;
125	struct netmap_pipe_adapter *na;
126
127	for (i = 0; i < parent->na_next_pipe; i++) {
128		na = parent->na_pipes[i];
129		if (na->id == pipe_id) {
130			return na;
131		}
132	}
133	return NULL;
134}
135
136/* add a new pipe endpoint to the parent array */
137static int
138netmap_pipe_add(struct netmap_adapter *parent, struct netmap_pipe_adapter *na)
139{
140	if (parent->na_next_pipe >= parent->na_max_pipes) {
141		u_int npipes = parent->na_max_pipes ?  2*parent->na_max_pipes : 2;
142		int error = nm_pipe_alloc(parent, npipes);
143		if (error)
144			return error;
145	}
146
147	parent->na_pipes[parent->na_next_pipe] = na;
148	na->parent_slot = parent->na_next_pipe;
149	parent->na_next_pipe++;
150	return 0;
151}
152
153/* remove the given pipe endpoint from the parent array */
154static void
155netmap_pipe_remove(struct netmap_adapter *parent, struct netmap_pipe_adapter *na)
156{
157	u_int n;
158	n = --parent->na_next_pipe;
159	if (n != na->parent_slot) {
160		struct netmap_pipe_adapter **p =
161			&parent->na_pipes[na->parent_slot];
162		*p = parent->na_pipes[n];
163		(*p)->parent_slot = na->parent_slot;
164	}
165	parent->na_pipes[n] = NULL;
166}
167
168static int
169netmap_pipe_txsync(struct netmap_kring *txkring, int flags)
170{
171        struct netmap_kring *rxkring = txkring->pipe;
172        u_int limit; /* slots to transfer */
173        u_int j, k, lim_tx = txkring->nkr_num_slots - 1,
174                lim_rx = rxkring->nkr_num_slots - 1;
175        int m, busy;
176
177        ND("%p: %s %x -> %s", txkring, txkring->name, flags, rxkring->name);
178        ND(2, "before: hwcur %d hwtail %d cur %d head %d tail %d", txkring->nr_hwcur, txkring->nr_hwtail,
179                txkring->rcur, txkring->rhead, txkring->rtail);
180
181        j = rxkring->nr_hwtail; /* RX */
182        k = txkring->nr_hwcur;  /* TX */
183        m = txkring->rhead - txkring->nr_hwcur; /* new slots */
184        if (m < 0)
185                m += txkring->nkr_num_slots;
186        limit = m;
187        m = lim_rx; /* max avail space on destination */
188        busy = j - rxkring->nr_hwcur; /* busy slots */
189	if (busy < 0)
190		busy += rxkring->nkr_num_slots;
191	m -= busy; /* subtract busy slots */
192        ND(2, "m %d limit %d", m, limit);
193        if (m < limit)
194                limit = m;
195
196	if (limit == 0) {
197		/* either the rxring is full, or nothing to send */
198		return 0;
199	}
200
201        while (limit-- > 0) {
202                struct netmap_slot *rs = &rxkring->save_ring->slot[j];
203                struct netmap_slot *ts = &txkring->ring->slot[k];
204                struct netmap_slot tmp;
205
206                /* swap the slots */
207                tmp = *rs;
208                *rs = *ts;
209                *ts = tmp;
210
211                /* report the buffer change */
212		ts->flags |= NS_BUF_CHANGED;
213		rs->flags |= NS_BUF_CHANGED;
214
215                j = nm_next(j, lim_rx);
216                k = nm_next(k, lim_tx);
217        }
218
219        mb(); /* make sure the slots are updated before publishing them */
220        rxkring->nr_hwtail = j;
221        txkring->nr_hwcur = k;
222        txkring->nr_hwtail = nm_prev(k, lim_tx);
223
224        ND(2, "after: hwcur %d hwtail %d cur %d head %d tail %d j %d", txkring->nr_hwcur, txkring->nr_hwtail,
225                txkring->rcur, txkring->rhead, txkring->rtail, j);
226
227        mb(); /* make sure rxkring->nr_hwtail is updated before notifying */
228        rxkring->nm_notify(rxkring, 0);
229
230	return 0;
231}
232
233static int
234netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags)
235{
236        struct netmap_kring *txkring = rxkring->pipe;
237	uint32_t oldhwcur = rxkring->nr_hwcur;
238
239        ND("%s %x <- %s", rxkring->name, flags, txkring->name);
240        rxkring->nr_hwcur = rxkring->rhead; /* recover user-relased slots */
241        ND(5, "hwcur %d hwtail %d cur %d head %d tail %d", rxkring->nr_hwcur, rxkring->nr_hwtail,
242                rxkring->rcur, rxkring->rhead, rxkring->rtail);
243        mb(); /* paired with the first mb() in txsync */
244
245	if (oldhwcur != rxkring->nr_hwcur) {
246		/* we have released some slots, notify the other end */
247		mb(); /* make sure nr_hwcur is updated before notifying */
248		txkring->nm_notify(txkring, 0);
249	}
250        return 0;
251}
252
253/* Pipe endpoints are created and destroyed together, so that endopoints do not
254 * have to check for the existence of their peer at each ?xsync.
255 *
256 * To play well with the existing netmap infrastructure (refcounts etc.), we
257 * adopt the following strategy:
258 *
259 * 1) The first endpoint that is created also creates the other endpoint and
260 * grabs a reference to it.
261 *
262 *    state A)  user1 --> endpoint1 --> endpoint2
263 *
264 * 2) If, starting from state A, endpoint2 is then registered, endpoint1 gives
265 * its reference to the user:
266 *
267 *    state B)  user1 --> endpoint1     endpoint2 <--- user2
268 *
269 * 3) Assume that, starting from state B endpoint2 is closed. In the unregister
270 * callback endpoint2 notes that endpoint1 is still active and adds a reference
271 * from endpoint1 to itself. When user2 then releases her own reference,
272 * endpoint2 is not destroyed and we are back to state A. A symmetrical state
273 * would be reached if endpoint1 were released instead.
274 *
275 * 4) If, starting from state A, endpoint1 is closed, the destructor notes that
276 * it owns a reference to endpoint2 and releases it.
277 *
278 * Something similar goes on for the creation and destruction of the krings.
279 */
280
281
282/* netmap_pipe_krings_delete.
283 *
284 * There are two cases:
285 *
286 * 1) state is
287 *
288 *        usr1 --> e1 --> e2
289 *
290 *    and we are e1. We have to create both sets
291 *    of krings.
292 *
293 * 2) state is
294 *
295 *        usr1 --> e1 --> e2
296 *
297 *    and we are e2. e1 is certainly registered and our
298 *    krings already exist, but they may be hidden.
299 */
300static int
301netmap_pipe_krings_create(struct netmap_adapter *na)
302{
303	struct netmap_pipe_adapter *pna =
304		(struct netmap_pipe_adapter *)na;
305	struct netmap_adapter *ona = &pna->peer->up;
306	int error = 0;
307	enum txrx t;
308
309	if (pna->peer_ref) {
310		int i;
311
312		/* case 1) above */
313		ND("%p: case 1, create everything", na);
314		error = netmap_krings_create(na, 0);
315		if (error)
316			goto err;
317
318		/* we also create all the rings, since we need to
319                 * update the save_ring pointers.
320                 * netmap_mem_rings_create (called by our caller)
321                 * will not create the rings again
322                 */
323
324		error = netmap_mem_rings_create(na);
325		if (error)
326			goto del_krings1;
327
328		/* update our hidden ring pointers */
329		for_rx_tx(t) {
330			for (i = 0; i < nma_get_nrings(na, t) + 1; i++)
331				NMR(na, t)[i].save_ring = NMR(na, t)[i].ring;
332		}
333
334		/* now, create krings and rings of the other end */
335		error = netmap_krings_create(ona, 0);
336		if (error)
337			goto del_rings1;
338
339		error = netmap_mem_rings_create(ona);
340		if (error)
341			goto del_krings2;
342
343		for_rx_tx(t) {
344			for (i = 0; i < nma_get_nrings(ona, t) + 1; i++)
345				NMR(ona, t)[i].save_ring = NMR(ona, t)[i].ring;
346		}
347
348		/* cross link the krings */
349		for_rx_tx(t) {
350			enum txrx r= nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
351			for (i = 0; i < nma_get_nrings(na, t); i++) {
352				NMR(na, t)[i].pipe = NMR(&pna->peer->up, r) + i;
353				NMR(&pna->peer->up, r)[i].pipe = NMR(na, t) + i;
354			}
355		}
356	} else {
357		int i;
358		/* case 2) above */
359		/* recover the hidden rings */
360		ND("%p: case 2, hidden rings", na);
361		for_rx_tx(t) {
362			for (i = 0; i < nma_get_nrings(na, t) + 1; i++)
363				NMR(na, t)[i].ring = NMR(na, t)[i].save_ring;
364		}
365	}
366	return 0;
367
368del_krings2:
369	netmap_krings_delete(ona);
370del_rings1:
371	netmap_mem_rings_delete(na);
372del_krings1:
373	netmap_krings_delete(na);
374err:
375	return error;
376}
377
378/* netmap_pipe_reg.
379 *
380 * There are two cases on registration (onoff==1)
381 *
382 * 1.a) state is
383 *
384 *        usr1 --> e1 --> e2
385 *
386 *      and we are e1. Nothing special to do.
387 *
388 * 1.b) state is
389 *
390 *        usr1 --> e1 --> e2 <-- usr2
391 *
392 *      and we are e2. Drop the ref e1 is holding.
393 *
394 *  There are two additional cases on unregister (onoff==0)
395 *
396 *  2.a) state is
397 *
398 *         usr1 --> e1 --> e2
399 *
400 *       and we are e1. Nothing special to do, e2 will
401 *       be cleaned up by the destructor of e1.
402 *
403 *  2.b) state is
404 *
405 *         usr1 --> e1     e2 <-- usr2
406 *
407 *       and we are either e1 or e2. Add a ref from the
408 *       other end and hide our rings.
409 */
410static int
411netmap_pipe_reg(struct netmap_adapter *na, int onoff)
412{
413	struct netmap_pipe_adapter *pna =
414		(struct netmap_pipe_adapter *)na;
415	enum txrx t;
416
417	ND("%p: onoff %d", na, onoff);
418	if (onoff) {
419		na->na_flags |= NAF_NETMAP_ON;
420	} else {
421		na->na_flags &= ~NAF_NETMAP_ON;
422	}
423	if (pna->peer_ref) {
424		ND("%p: case 1.a or 2.a, nothing to do", na);
425		return 0;
426	}
427	if (onoff) {
428		ND("%p: case 1.b, drop peer", na);
429		pna->peer->peer_ref = 0;
430		netmap_adapter_put(na);
431	} else {
432		int i;
433		ND("%p: case 2.b, grab peer", na);
434		netmap_adapter_get(na);
435		pna->peer->peer_ref = 1;
436		/* hide our rings from netmap_mem_rings_delete */
437		for_rx_tx(t) {
438			for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
439				NMR(na, t)[i].ring = NULL;
440			}
441		}
442	}
443	return 0;
444}
445
446/* netmap_pipe_krings_delete.
447 *
448 * There are two cases:
449 *
450 * 1) state is
451 *
452 *                usr1 --> e1 --> e2
453 *
454 *    and we are e1 (e2 is not registered, so krings_delete cannot be
455 *    called on it);
456 *
457 * 2) state is
458 *
459 *                usr1 --> e1     e2 <-- usr2
460 *
461 *    and we are either e1 or e2.
462 *
463 * In the former case we have to also delete the krings of e2;
464 * in the latter case we do nothing (note that our krings
465 * have already been hidden in the unregister callback).
466 */
467static void
468netmap_pipe_krings_delete(struct netmap_adapter *na)
469{
470	struct netmap_pipe_adapter *pna =
471		(struct netmap_pipe_adapter *)na;
472	struct netmap_adapter *ona; /* na of the other end */
473	int i;
474	enum txrx t;
475
476	if (!pna->peer_ref) {
477		ND("%p: case 2, kept alive by peer",  na);
478		return;
479	}
480	/* case 1) above */
481	ND("%p: case 1, deleting everyhing", na);
482	netmap_krings_delete(na); /* also zeroes tx_rings etc. */
483	/* restore the ring to be deleted on the peer */
484	ona = &pna->peer->up;
485	if (ona->tx_rings == NULL) {
486		/* already deleted, we must be on an
487                 * cleanup-after-error path */
488		return;
489	}
490	for_rx_tx(t) {
491		for (i = 0; i < nma_get_nrings(ona, t) + 1; i++)
492			NMR(ona, t)[i].ring = NMR(ona, t)[i].save_ring;
493	}
494	netmap_mem_rings_delete(ona);
495	netmap_krings_delete(ona);
496}
497
498
499static void
500netmap_pipe_dtor(struct netmap_adapter *na)
501{
502	struct netmap_pipe_adapter *pna =
503		(struct netmap_pipe_adapter *)na;
504	ND("%p", na);
505	if (pna->peer_ref) {
506		ND("%p: clean up peer", na);
507		pna->peer_ref = 0;
508		netmap_adapter_put(&pna->peer->up);
509	}
510	if (pna->role == NR_REG_PIPE_MASTER)
511		netmap_pipe_remove(pna->parent, pna);
512	netmap_adapter_put(pna->parent);
513	pna->parent = NULL;
514}
515
516int
517netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
518{
519	struct nmreq pnmr;
520	struct netmap_adapter *pna; /* parent adapter */
521	struct netmap_pipe_adapter *mna, *sna, *req;
522	u_int pipe_id;
523	int role = nmr->nr_flags & NR_REG_MASK;
524	int error;
525
526	ND("flags %x", nmr->nr_flags);
527
528	if (role != NR_REG_PIPE_MASTER && role != NR_REG_PIPE_SLAVE) {
529		ND("not a pipe");
530		return 0;
531	}
532	role = nmr->nr_flags & NR_REG_MASK;
533
534	/* first, try to find the parent adapter */
535	bzero(&pnmr, sizeof(pnmr));
536	memcpy(&pnmr.nr_name, nmr->nr_name, IFNAMSIZ);
537	/* pass to parent the requested number of pipes */
538	pnmr.nr_arg1 = nmr->nr_arg1;
539	error = netmap_get_na(&pnmr, &pna, create);
540	if (error) {
541		ND("parent lookup failed: %d", error);
542		return error;
543	}
544	ND("found parent: %s", na->name);
545
546	if (NETMAP_OWNED_BY_KERN(pna)) {
547		ND("parent busy");
548		error = EBUSY;
549		goto put_out;
550	}
551
552	/* next, lookup the pipe id in the parent list */
553	req = NULL;
554	pipe_id = nmr->nr_ringid & NETMAP_RING_MASK;
555	mna = netmap_pipe_find(pna, pipe_id);
556	if (mna) {
557		if (mna->role == role) {
558			ND("found %d directly at %d", pipe_id, mna->parent_slot);
559			req = mna;
560		} else {
561			ND("found %d indirectly at %d", pipe_id, mna->parent_slot);
562			req = mna->peer;
563		}
564		/* the pipe we have found already holds a ref to the parent,
565                 * so we need to drop the one we got from netmap_get_na()
566                 */
567		netmap_adapter_put(pna);
568		goto found;
569	}
570	ND("pipe %d not found, create %d", pipe_id, create);
571	if (!create) {
572		error = ENODEV;
573		goto put_out;
574	}
575	/* we create both master and slave.
576         * The endpoint we were asked for holds a reference to
577         * the other one.
578         */
579	mna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO);
580	if (mna == NULL) {
581		error = ENOMEM;
582		goto put_out;
583	}
584	snprintf(mna->up.name, sizeof(mna->up.name), "%s{%d", pna->name, pipe_id);
585
586	mna->id = pipe_id;
587	mna->role = NR_REG_PIPE_MASTER;
588	mna->parent = pna;
589
590	mna->up.nm_txsync = netmap_pipe_txsync;
591	mna->up.nm_rxsync = netmap_pipe_rxsync;
592	mna->up.nm_register = netmap_pipe_reg;
593	mna->up.nm_dtor = netmap_pipe_dtor;
594	mna->up.nm_krings_create = netmap_pipe_krings_create;
595	mna->up.nm_krings_delete = netmap_pipe_krings_delete;
596	mna->up.nm_mem = pna->nm_mem;
597	mna->up.na_lut = pna->na_lut;
598
599	mna->up.num_tx_rings = 1;
600	mna->up.num_rx_rings = 1;
601	mna->up.num_tx_desc = nmr->nr_tx_slots;
602	nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc,
603			1, NM_PIPE_MAXSLOTS, NULL);
604	mna->up.num_rx_desc = nmr->nr_rx_slots;
605	nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc,
606			1, NM_PIPE_MAXSLOTS, NULL);
607	error = netmap_attach_common(&mna->up);
608	if (error)
609		goto free_mna;
610	/* register the master with the parent */
611	error = netmap_pipe_add(pna, mna);
612	if (error)
613		goto free_mna;
614
615	/* create the slave */
616	sna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO);
617	if (sna == NULL) {
618		error = ENOMEM;
619		goto unregister_mna;
620	}
621	/* most fields are the same, copy from master and then fix */
622	*sna = *mna;
623	snprintf(sna->up.name, sizeof(sna->up.name), "%s}%d", pna->name, pipe_id);
624	sna->role = NR_REG_PIPE_SLAVE;
625	error = netmap_attach_common(&sna->up);
626	if (error)
627		goto free_sna;
628
629	/* join the two endpoints */
630	mna->peer = sna;
631	sna->peer = mna;
632
633	/* we already have a reference to the parent, but we
634         * need another one for the other endpoint we created
635         */
636	netmap_adapter_get(pna);
637
638	if (role == NR_REG_PIPE_MASTER) {
639		req = mna;
640		mna->peer_ref = 1;
641		netmap_adapter_get(&sna->up);
642	} else {
643		req = sna;
644		sna->peer_ref = 1;
645		netmap_adapter_get(&mna->up);
646	}
647	ND("created master %p and slave %p", mna, sna);
648found:
649
650	ND("pipe %d %s at %p", pipe_id,
651		(req->role == NR_REG_PIPE_MASTER ? "master" : "slave"), req);
652	*na = &req->up;
653	netmap_adapter_get(*na);
654
655	/* write the configuration back */
656	nmr->nr_tx_rings = req->up.num_tx_rings;
657	nmr->nr_rx_rings = req->up.num_rx_rings;
658	nmr->nr_tx_slots = req->up.num_tx_desc;
659	nmr->nr_rx_slots = req->up.num_rx_desc;
660
661	/* keep the reference to the parent.
662         * It will be released by the req destructor
663         */
664
665	return 0;
666
667free_sna:
668	free(sna, M_DEVBUF);
669unregister_mna:
670	netmap_pipe_remove(pna, mna);
671free_mna:
672	free(mna, M_DEVBUF);
673put_out:
674	netmap_adapter_put(pna);
675	return error;
676}
677
678
679#endif /* WITH_PIPES */
680