1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (C) 2014-2018 Giuseppe Lettieri
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *   1. Redistributions of source code must retain the above copyright
11 *      notice, this list of conditions and the following disclaimer.
12 *   2. Redistributions in binary form must reproduce the above copyright
13 *      notice, this list of conditions and the following disclaimer in the
14 *      documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29/* $FreeBSD$ */
30
31#if defined(__FreeBSD__)
32#include <sys/cdefs.h> /* prerequisite */
33
34#include <sys/types.h>
35#include <sys/errno.h>
36#include <sys/param.h>	/* defines used in kernel.h */
37#include <sys/kernel.h>	/* types used in module initialization */
38#include <sys/malloc.h>
39#include <sys/poll.h>
40#include <sys/lock.h>
41#include <sys/rwlock.h>
42#include <sys/selinfo.h>
43#include <sys/sysctl.h>
44#include <sys/socket.h> /* sockaddrs */
45#include <net/if.h>
46#include <net/if_var.h>
47#include <machine/bus.h>	/* bus_dmamap_* */
48#include <sys/refcount.h>
49
50
51#elif defined(linux)
52
53#include "bsd_glue.h"
54
55#elif defined(__APPLE__)
56
57#warning OSX support is only partial
58#include "osx_glue.h"
59
60#elif defined(_WIN32)
61#include "win_glue.h"
62
63#else
64
65#error	Unsupported platform
66
67#endif /* unsupported */
68
69/*
70 * common headers
71 */
72
73#include <net/netmap.h>
74#include <dev/netmap/netmap_kern.h>
75#include <dev/netmap/netmap_mem2.h>
76
77#ifdef WITH_PIPES
78
79#define NM_PIPE_MAXSLOTS	4096
80#define NM_PIPE_MAXRINGS	256
81
82static int netmap_default_pipes = 0; /* ignored, kept for compatibility */
83SYSBEGIN(vars_pipes);
84SYSCTL_DECL(_dev_netmap);
85SYSCTL_INT(_dev_netmap, OID_AUTO, default_pipes, CTLFLAG_RW,
86		&netmap_default_pipes, 0, "For compatibility only");
87SYSEND;
88
89/* allocate the pipe array in the parent adapter */
90static int
91nm_pipe_alloc(struct netmap_adapter *na, u_int npipes)
92{
93	size_t old_len, len;
94	struct netmap_pipe_adapter **npa;
95
96	if (npipes <= na->na_max_pipes)
97		/* we already have more entries that requested */
98		return 0;
99
100	if (npipes < na->na_next_pipe || npipes > NM_MAXPIPES)
101		return EINVAL;
102
103	old_len = sizeof(struct netmap_pipe_adapter *)*na->na_max_pipes;
104	len = sizeof(struct netmap_pipe_adapter *) * npipes;
105	npa = nm_os_realloc(na->na_pipes, len, old_len);
106	if (npa == NULL)
107		return ENOMEM;
108
109	na->na_pipes = npa;
110	na->na_max_pipes = npipes;
111
112	return 0;
113}
114
115/* deallocate the parent array in the parent adapter */
116void
117netmap_pipe_dealloc(struct netmap_adapter *na)
118{
119	if (na->na_pipes) {
120		if (na->na_next_pipe > 0) {
121			nm_prerr("freeing not empty pipe array for %s (%d dangling pipes)!",
122			    na->name, na->na_next_pipe);
123		}
124		nm_os_free(na->na_pipes);
125		na->na_pipes = NULL;
126		na->na_max_pipes = 0;
127		na->na_next_pipe = 0;
128	}
129}
130
131/* find a pipe endpoint with the given id among the parent's pipes */
132static struct netmap_pipe_adapter *
133netmap_pipe_find(struct netmap_adapter *parent, const char *pipe_id)
134{
135	int i;
136	struct netmap_pipe_adapter *na;
137
138	for (i = 0; i < parent->na_next_pipe; i++) {
139		const char *na_pipe_id;
140		na = parent->na_pipes[i];
141		na_pipe_id = strrchr(na->up.name,
142			na->role == NM_PIPE_ROLE_MASTER ? '{' : '}');
143		KASSERT(na_pipe_id != NULL, ("Invalid pipe name"));
144		++na_pipe_id;
145		if (!strcmp(na_pipe_id, pipe_id)) {
146			return na;
147		}
148	}
149	return NULL;
150}
151
152/* add a new pipe endpoint to the parent array */
153static int
154netmap_pipe_add(struct netmap_adapter *parent, struct netmap_pipe_adapter *na)
155{
156	if (parent->na_next_pipe >= parent->na_max_pipes) {
157		u_int npipes = parent->na_max_pipes ?  2*parent->na_max_pipes : 2;
158		int error = nm_pipe_alloc(parent, npipes);
159		if (error)
160			return error;
161	}
162
163	parent->na_pipes[parent->na_next_pipe] = na;
164	na->parent_slot = parent->na_next_pipe;
165	parent->na_next_pipe++;
166	return 0;
167}
168
169/* remove the given pipe endpoint from the parent array */
170static void
171netmap_pipe_remove(struct netmap_adapter *parent, struct netmap_pipe_adapter *na)
172{
173	u_int n;
174	n = --parent->na_next_pipe;
175	if (n != na->parent_slot) {
176		struct netmap_pipe_adapter **p =
177			&parent->na_pipes[na->parent_slot];
178		*p = parent->na_pipes[n];
179		(*p)->parent_slot = na->parent_slot;
180	}
181	parent->na_pipes[n] = NULL;
182}
183
184int
185netmap_pipe_txsync(struct netmap_kring *txkring, int flags)
186{
187	struct netmap_kring *rxkring = txkring->pipe;
188	u_int k, lim = txkring->nkr_num_slots - 1, nk;
189	int m; /* slots to transfer */
190	int complete; /* did we see a complete packet ? */
191	struct netmap_ring *txring = txkring->ring, *rxring = rxkring->ring;
192
193	nm_prdis("%p: %s %x -> %s", txkring, txkring->name, flags, rxkring->name);
194	nm_prdis(20, "TX before: hwcur %d hwtail %d cur %d head %d tail %d",
195		txkring->nr_hwcur, txkring->nr_hwtail,
196		txkring->rcur, txkring->rhead, txkring->rtail);
197
198	/* update the hwtail */
199	txkring->nr_hwtail = txkring->pipe_tail;
200
201	m = txkring->rhead - txkring->nr_hwcur; /* new slots */
202	if (m < 0)
203		m += txkring->nkr_num_slots;
204
205	if (m == 0) {
206		/* nothing to send */
207		return 0;
208	}
209
210	for (k = txkring->nr_hwcur, nk = lim + 1, complete = 0; m;
211			m--, k = nm_next(k, lim), nk = (complete ? k : nk)) {
212		struct netmap_slot *rs = &rxring->slot[k];
213		struct netmap_slot *ts = &txring->slot[k];
214
215		*rs = *ts;
216		if (ts->flags & NS_BUF_CHANGED) {
217			ts->flags &= ~NS_BUF_CHANGED;
218		}
219		complete = !(ts->flags & NS_MOREFRAG);
220	}
221
222	txkring->nr_hwcur = k;
223
224	nm_prdis(20, "TX after : hwcur %d hwtail %d cur %d head %d tail %d k %d",
225		txkring->nr_hwcur, txkring->nr_hwtail,
226		txkring->rcur, txkring->rhead, txkring->rtail, k);
227
228	if (likely(nk <= lim)) {
229		mb(); /* make sure the slots are updated before publishing them */
230		rxkring->pipe_tail = nk; /* only publish complete packets */
231		rxkring->nm_notify(rxkring, 0);
232	}
233
234	return 0;
235}
236
237int
238netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags)
239{
240	struct netmap_kring *txkring = rxkring->pipe;
241	u_int k, lim = rxkring->nkr_num_slots - 1;
242	int m; /* slots to release */
243	struct netmap_ring *txring = txkring->ring, *rxring = rxkring->ring;
244
245	nm_prdis("%p: %s %x -> %s", txkring, txkring->name, flags, rxkring->name);
246	nm_prdis(20, "RX before: hwcur %d hwtail %d cur %d head %d tail %d",
247		rxkring->nr_hwcur, rxkring->nr_hwtail,
248		rxkring->rcur, rxkring->rhead, rxkring->rtail);
249
250	/* update the hwtail */
251	rxkring->nr_hwtail = rxkring->pipe_tail;
252
253	m = rxkring->rhead - rxkring->nr_hwcur; /* released slots */
254	if (m < 0)
255		m += rxkring->nkr_num_slots;
256
257	if (m == 0) {
258		/* nothing to release */
259		return 0;
260	}
261
262	for (k = rxkring->nr_hwcur; m; m--, k = nm_next(k, lim)) {
263		struct netmap_slot *rs = &rxring->slot[k];
264		struct netmap_slot *ts = &txring->slot[k];
265
266		if (rs->flags & NS_BUF_CHANGED) {
267			/* copy the slot and report the buffer change */
268			*ts = *rs;
269			rs->flags &= ~NS_BUF_CHANGED;
270		}
271	}
272
273	mb(); /* make sure the slots are updated before publishing them */
274	txkring->pipe_tail = nm_prev(k, lim);
275	rxkring->nr_hwcur = k;
276
277	nm_prdis(20, "RX after : hwcur %d hwtail %d cur %d head %d tail %d k %d",
278		rxkring->nr_hwcur, rxkring->nr_hwtail,
279		rxkring->rcur, rxkring->rhead, rxkring->rtail, k);
280
281	txkring->nm_notify(txkring, 0);
282
283	return 0;
284}
285
286/* Pipe endpoints are created and destroyed together, so that endopoints do not
287 * have to check for the existence of their peer at each ?xsync.
288 *
289 * To play well with the existing netmap infrastructure (refcounts etc.), we
290 * adopt the following strategy:
291 *
292 * 1) The first endpoint that is created also creates the other endpoint and
293 * grabs a reference to it.
294 *
295 *    state A)  user1 --> endpoint1 --> endpoint2
296 *
297 * 2) If, starting from state A, endpoint2 is then registered, endpoint1 gives
298 * its reference to the user:
299 *
300 *    state B)  user1 --> endpoint1     endpoint2 <--- user2
301 *
302 * 3) Assume that, starting from state B endpoint2 is closed. In the unregister
303 * callback endpoint2 notes that endpoint1 is still active and adds a reference
304 * from endpoint1 to itself. When user2 then releases her own reference,
305 * endpoint2 is not destroyed and we are back to state A. A symmetrical state
306 * would be reached if endpoint1 were released instead.
307 *
308 * 4) If, starting from state A, endpoint1 is closed, the destructor notes that
309 * it owns a reference to endpoint2 and releases it.
310 *
311 * Something similar goes on for the creation and destruction of the krings.
312 */
313
314
315int netmap_pipe_krings_create_both(struct netmap_adapter *na,
316				  struct netmap_adapter *ona)
317{
318	enum txrx t;
319	int error;
320	int i;
321
322	/* case 1) below */
323	nm_prdis("%p: case 1, create both ends", na);
324	error = netmap_krings_create(na, 0);
325	if (error)
326		return error;
327
328	/* create the krings of the other end */
329	error = netmap_krings_create(ona, 0);
330	if (error)
331		goto del_krings1;
332
333	/* cross link the krings and initialize the pipe_tails */
334	for_rx_tx(t) {
335		enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
336		for (i = 0; i < nma_get_nrings(na, t); i++) {
337			struct netmap_kring *k1 = NMR(na, t)[i],
338					    *k2 = NMR(ona, r)[i];
339			k1->pipe = k2;
340			k2->pipe = k1;
341			/* mark all peer-adapter rings as fake */
342			k2->nr_kflags |= NKR_FAKERING;
343			/* init tails */
344			k1->pipe_tail = k1->nr_hwtail;
345			k2->pipe_tail = k2->nr_hwtail;
346		}
347	}
348
349	return 0;
350
351del_krings1:
352	netmap_krings_delete(na);
353	return error;
354}
355
356/* netmap_pipe_krings_create.
357 *
358 * There are two cases:
359 *
360 * 1) state is
361 *
362 *        usr1 --> e1 --> e2
363 *
364 *    and we are e1. We have to create both sets
365 *    of krings.
366 *
367 * 2) state is
368 *
369 *        usr1 --> e1 --> e2
370 *
371 *    and we are e2. e1 is certainly registered and our
372 *    krings already exist. Nothing to do.
373 */
374static int
375netmap_pipe_krings_create(struct netmap_adapter *na)
376{
377	struct netmap_pipe_adapter *pna =
378		(struct netmap_pipe_adapter *)na;
379	struct netmap_adapter *ona = &pna->peer->up;
380
381	if (pna->peer_ref)
382		return netmap_pipe_krings_create_both(na, ona);
383
384	return 0;
385}
386
387int
388netmap_pipe_reg_both(struct netmap_adapter *na, struct netmap_adapter *ona)
389{
390	int i, error = 0;
391	enum txrx t;
392
393	for_rx_tx(t) {
394		for (i = 0; i < nma_get_nrings(na, t); i++) {
395			struct netmap_kring *kring = NMR(na, t)[i];
396
397			if (nm_kring_pending_on(kring)) {
398				/* mark the peer ring as needed */
399				kring->pipe->nr_kflags |= NKR_NEEDRING;
400			}
401		}
402	}
403
404	/* create all missing needed rings on the other end.
405	 * Either our end, or the other, has been marked as
406	 * fake, so the allocation will not be done twice.
407	 */
408	error = netmap_mem_rings_create(ona);
409	if (error)
410		return error;
411
412	/* In case of no error we put our rings in netmap mode */
413	for_rx_tx(t) {
414		for (i = 0; i < nma_get_nrings(na, t); i++) {
415			struct netmap_kring *kring = NMR(na, t)[i];
416			if (nm_kring_pending_on(kring)) {
417				struct netmap_kring *sring, *dring;
418
419				kring->nr_mode = NKR_NETMAP_ON;
420				if ((kring->nr_kflags & NKR_FAKERING) &&
421				    (kring->pipe->nr_kflags & NKR_FAKERING)) {
422					/* this is a re-open of a pipe
423					 * end-point kept alive by the other end.
424					 * We need to leave everything as it is
425					 */
426					continue;
427				}
428
429				/* copy the buffers from the non-fake ring */
430				if (kring->nr_kflags & NKR_FAKERING) {
431					sring = kring->pipe;
432					dring = kring;
433				} else {
434					sring = kring;
435					dring = kring->pipe;
436				}
437				memcpy(dring->ring->slot,
438				       sring->ring->slot,
439				       sizeof(struct netmap_slot) *
440						sring->nkr_num_slots);
441				/* mark both rings as fake and needed,
442				 * so that buffers will not be
443				 * deleted by the standard machinery
444				 * (we will delete them by ourselves in
445				 * netmap_pipe_krings_delete)
446				 */
447				sring->nr_kflags |=
448					(NKR_FAKERING | NKR_NEEDRING);
449				dring->nr_kflags |=
450					(NKR_FAKERING | NKR_NEEDRING);
451				kring->nr_mode = NKR_NETMAP_ON;
452			}
453		}
454	}
455
456	return 0;
457}
458
459/* netmap_pipe_reg.
460 *
461 * There are two cases on registration (onoff==1)
462 *
463 * 1.a) state is
464 *
465 *        usr1 --> e1 --> e2
466 *
467 *      and we are e1. Create the needed rings of the
468 *      other end.
469 *
470 * 1.b) state is
471 *
472 *        usr1 --> e1 --> e2 <-- usr2
473 *
474 *      and we are e2. Drop the ref e1 is holding.
475 *
476 *  There are two additional cases on unregister (onoff==0)
477 *
478 *  2.a) state is
479 *
480 *         usr1 --> e1 --> e2
481 *
482 *       and we are e1. Nothing special to do, e2 will
483 *       be cleaned up by the destructor of e1.
484 *
485 *  2.b) state is
486 *
487 *         usr1 --> e1     e2 <-- usr2
488 *
489 *       and we are either e1 or e2. Add a ref from the
490 *       other end.
491 */
492static int
493netmap_pipe_reg(struct netmap_adapter *na, int onoff)
494{
495	struct netmap_pipe_adapter *pna =
496		(struct netmap_pipe_adapter *)na;
497	struct netmap_adapter *ona = &pna->peer->up;
498	int error = 0;
499
500	nm_prdis("%p: onoff %d", na, onoff);
501	if (onoff) {
502		error = netmap_pipe_reg_both(na, ona);
503		if (error) {
504			return error;
505		}
506		if (na->active_fds == 0)
507			na->na_flags |= NAF_NETMAP_ON;
508	} else {
509		if (na->active_fds == 0)
510			na->na_flags &= ~NAF_NETMAP_ON;
511		netmap_krings_mode_commit(na, onoff);
512	}
513
514	if (na->active_fds) {
515		nm_prdis("active_fds %d", na->active_fds);
516		return 0;
517	}
518
519	if (pna->peer_ref) {
520		nm_prdis("%p: case 1.a or 2.a, nothing to do", na);
521		return 0;
522	}
523	if (onoff) {
524		nm_prdis("%p: case 1.b, drop peer", na);
525		pna->peer->peer_ref = 0;
526		netmap_adapter_put(na);
527	} else {
528		nm_prdis("%p: case 2.b, grab peer", na);
529		netmap_adapter_get(na);
530		pna->peer->peer_ref = 1;
531	}
532	return error;
533}
534
535void
536netmap_pipe_krings_delete_both(struct netmap_adapter *na,
537			       struct netmap_adapter *ona)
538{
539	struct netmap_adapter *sna;
540	enum txrx t;
541	int i;
542
543	/* case 1) below */
544	nm_prdis("%p: case 1, deleting everything", na);
545	/* To avoid double-frees we zero-out all the buffers in the kernel part
546	 * of each ring. The reason is this: If the user is behaving correctly,
547	 * all buffers are found in exactly one slot in the userspace part of
548	 * some ring.  If the user is not behaving correctly, we cannot release
549	 * buffers cleanly anyway. In the latter case, the allocator will
550	 * return to a clean state only when all its users will close.
551	 */
552	sna = na;
553cleanup:
554	for_rx_tx(t) {
555		for (i = 0; i < nma_get_nrings(sna, t); i++) {
556			struct netmap_kring *kring = NMR(sna, t)[i];
557			struct netmap_ring *ring = kring->ring;
558			uint32_t j, lim = kring->nkr_num_slots - 1;
559
560			nm_prdis("%s ring %p hwtail %u hwcur %u",
561				kring->name, ring, kring->nr_hwtail, kring->nr_hwcur);
562
563			if (ring == NULL)
564				continue;
565
566			if (kring->tx == NR_RX)
567				ring->slot[kring->pipe_tail].buf_idx = 0;
568
569			for (j = nm_next(kring->pipe_tail, lim);
570			     j != kring->nr_hwcur;
571			     j = nm_next(j, lim))
572			{
573				nm_prdis("%s[%d] %u", kring->name, j, ring->slot[j].buf_idx);
574				ring->slot[j].buf_idx = 0;
575			}
576			kring->nr_kflags &= ~(NKR_FAKERING | NKR_NEEDRING);
577		}
578
579	}
580	if (sna != ona && ona->tx_rings) {
581		sna = ona;
582		goto cleanup;
583	}
584
585	netmap_mem_rings_delete(na);
586	netmap_krings_delete(na); /* also zeroes tx_rings etc. */
587
588	if (ona->tx_rings == NULL) {
589		/* already deleted, we must be on an
590		 * cleanup-after-error path */
591		return;
592	}
593	netmap_mem_rings_delete(ona);
594	netmap_krings_delete(ona);
595}
596
597/* netmap_pipe_krings_delete.
598 *
599 * There are two cases:
600 *
601 * 1) state is
602 *
603 *                usr1 --> e1 --> e2
604 *
605 *    and we are e1 (e2 is not registered, so krings_delete cannot be
606 *    called on it);
607 *
608 * 2) state is
609 *
610 *                usr1 --> e1     e2 <-- usr2
611 *
612 *    and we are either e1 or e2.
613 *
614 * In the former case we have to also delete the krings of e2;
615 * in the latter case we do nothing.
616 */
617static void
618netmap_pipe_krings_delete(struct netmap_adapter *na)
619{
620	struct netmap_pipe_adapter *pna =
621		(struct netmap_pipe_adapter *)na;
622	struct netmap_adapter *ona; /* na of the other end */
623
624	if (!pna->peer_ref) {
625		nm_prdis("%p: case 2, kept alive by peer",  na);
626		return;
627	}
628	ona = &pna->peer->up;
629	netmap_pipe_krings_delete_both(na, ona);
630}
631
632
633static void
634netmap_pipe_dtor(struct netmap_adapter *na)
635{
636	struct netmap_pipe_adapter *pna =
637		(struct netmap_pipe_adapter *)na;
638	nm_prdis("%p %p", na, pna->parent_ifp);
639	if (pna->peer_ref) {
640		nm_prdis("%p: clean up peer", na);
641		pna->peer_ref = 0;
642		netmap_adapter_put(&pna->peer->up);
643	}
644	if (pna->role == NM_PIPE_ROLE_MASTER)
645		netmap_pipe_remove(pna->parent, pna);
646	if (pna->parent_ifp)
647		if_rele(pna->parent_ifp);
648	netmap_adapter_put(pna->parent);
649	pna->parent = NULL;
650}
651
652int
653netmap_get_pipe_na(struct nmreq_header *hdr, struct netmap_adapter **na,
654		struct netmap_mem_d *nmd, int create)
655{
656	struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body;
657	struct netmap_adapter *pna; /* parent adapter */
658	struct netmap_pipe_adapter *mna, *sna, *reqna;
659	struct ifnet *ifp = NULL;
660	const char *pipe_id = NULL;
661	int role = 0;
662	int error, retries = 0;
663	char *cbra;
664
665	/* Try to parse the pipe syntax 'xx{yy' or 'xx}yy'. */
666	cbra = strrchr(hdr->nr_name, '{');
667	if (cbra != NULL) {
668		role = NM_PIPE_ROLE_MASTER;
669	} else {
670		cbra = strrchr(hdr->nr_name, '}');
671		if (cbra != NULL) {
672			role = NM_PIPE_ROLE_SLAVE;
673		} else {
674			nm_prdis("not a pipe");
675			return 0;
676		}
677	}
678	pipe_id = cbra + 1;
679	if (*pipe_id == '\0' || cbra == hdr->nr_name) {
680		/* Bracket is the last character, so pipe name is missing;
681		 * or bracket is the first character, so base port name
682		 * is missing. */
683		return EINVAL;
684	}
685
686	if (req->nr_mode != NR_REG_ALL_NIC && req->nr_mode != NR_REG_ONE_NIC) {
687		/* We only accept modes involving hardware rings. */
688		return EINVAL;
689	}
690
691	/* first, try to find the parent adapter */
692	for (;;) {
693		char nr_name_orig[NETMAP_REQ_IFNAMSIZ];
694		int create_error;
695
696		/* Temporarily remove the pipe suffix. */
697		strlcpy(nr_name_orig, hdr->nr_name, sizeof(nr_name_orig));
698		*cbra = '\0';
699		error = netmap_get_na(hdr, &pna, &ifp, nmd, create);
700		/* Restore the pipe suffix. */
701		strlcpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name));
702		if (!error)
703			break;
704		if (error != ENXIO || retries++) {
705			nm_prdis("parent lookup failed: %d", error);
706			return error;
707		}
708		nm_prdis("try to create a persistent vale port");
709		/* create a persistent vale port and try again */
710		*cbra = '\0';
711		NMG_UNLOCK();
712		create_error = netmap_vi_create(hdr, 1 /* autodelete */);
713		NMG_LOCK();
714		strlcpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name));
715		if (create_error && create_error != EEXIST) {
716			if (create_error != EOPNOTSUPP) {
717				nm_prerr("failed to create a persistent vale port: %d",
718				    create_error);
719			}
720			return error;
721		}
722	}
723
724	if (NETMAP_OWNED_BY_KERN(pna)) {
725		nm_prdis("parent busy");
726		error = EBUSY;
727		goto put_out;
728	}
729
730	/* next, lookup the pipe id in the parent list */
731	reqna = NULL;
732	mna = netmap_pipe_find(pna, pipe_id);
733	if (mna) {
734		if (mna->role == role) {
735			nm_prdis("found %s directly at %d", pipe_id, mna->parent_slot);
736			reqna = mna;
737		} else {
738			nm_prdis("found %s indirectly at %d", pipe_id, mna->parent_slot);
739			reqna = mna->peer;
740		}
741		/* the pipe we have found already holds a ref to the parent,
742		 * so we need to drop the one we got from netmap_get_na()
743		 */
744		netmap_unget_na(pna, ifp);
745		goto found;
746	}
747	nm_prdis("pipe %s not found, create %d", pipe_id, create);
748	if (!create) {
749		error = ENODEV;
750		goto put_out;
751	}
752	/* we create both master and slave.
753	 * The endpoint we were asked for holds a reference to
754	 * the other one.
755	 */
756	mna = nm_os_malloc(sizeof(*mna));
757	if (mna == NULL) {
758		error = ENOMEM;
759		goto put_out;
760	}
761	snprintf(mna->up.name, sizeof(mna->up.name), "%s{%s", pna->name, pipe_id);
762
763	mna->role = NM_PIPE_ROLE_MASTER;
764	mna->parent = pna;
765	mna->parent_ifp = ifp;
766
767	mna->up.nm_txsync = netmap_pipe_txsync;
768	mna->up.nm_rxsync = netmap_pipe_rxsync;
769	mna->up.nm_register = netmap_pipe_reg;
770	mna->up.nm_dtor = netmap_pipe_dtor;
771	mna->up.nm_krings_create = netmap_pipe_krings_create;
772	mna->up.nm_krings_delete = netmap_pipe_krings_delete;
773	mna->up.nm_mem = netmap_mem_get(pna->nm_mem);
774	mna->up.na_flags |= NAF_MEM_OWNER;
775	mna->up.na_lut = pna->na_lut;
776
777	mna->up.num_tx_rings = req->nr_tx_rings;
778	nm_bound_var(&mna->up.num_tx_rings, 1,
779			1, NM_PIPE_MAXRINGS, NULL);
780	mna->up.num_rx_rings = req->nr_rx_rings;
781	nm_bound_var(&mna->up.num_rx_rings, 1,
782			1, NM_PIPE_MAXRINGS, NULL);
783	mna->up.num_tx_desc = req->nr_tx_slots;
784	nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc,
785			1, NM_PIPE_MAXSLOTS, NULL);
786	mna->up.num_rx_desc = req->nr_rx_slots;
787	nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc,
788			1, NM_PIPE_MAXSLOTS, NULL);
789	error = netmap_attach_common(&mna->up);
790	if (error)
791		goto free_mna;
792	/* register the master with the parent */
793	error = netmap_pipe_add(pna, mna);
794	if (error)
795		goto free_mna;
796
797	/* create the slave */
798	sna = nm_os_malloc(sizeof(*mna));
799	if (sna == NULL) {
800		error = ENOMEM;
801		goto unregister_mna;
802	}
803	/* most fields are the same, copy from master and then fix */
804	*sna = *mna;
805	sna->up.nm_mem = netmap_mem_get(mna->up.nm_mem);
806	/* swap the number of tx/rx rings and slots */
807	sna->up.num_tx_rings = mna->up.num_rx_rings;
808	sna->up.num_tx_desc  = mna->up.num_rx_desc;
809	sna->up.num_rx_rings = mna->up.num_tx_rings;
810	sna->up.num_rx_desc  = mna->up.num_tx_desc;
811	snprintf(sna->up.name, sizeof(sna->up.name), "%s}%s", pna->name, pipe_id);
812	sna->role = NM_PIPE_ROLE_SLAVE;
813	error = netmap_attach_common(&sna->up);
814	if (error)
815		goto free_sna;
816
817	/* join the two endpoints */
818	mna->peer = sna;
819	sna->peer = mna;
820
821	/* we already have a reference to the parent, but we
822	 * need another one for the other endpoint we created
823	 */
824	netmap_adapter_get(pna);
825	/* likewise for the ifp, if any */
826	if (ifp)
827		if_ref(ifp);
828
829	if (role == NM_PIPE_ROLE_MASTER) {
830		reqna = mna;
831		mna->peer_ref = 1;
832		netmap_adapter_get(&sna->up);
833	} else {
834		reqna = sna;
835		sna->peer_ref = 1;
836		netmap_adapter_get(&mna->up);
837	}
838	nm_prdis("created master %p and slave %p", mna, sna);
839found:
840
841	nm_prdis("pipe %s %s at %p", pipe_id,
842		(reqna->role == NM_PIPE_ROLE_MASTER ? "master" : "slave"), reqna);
843	*na = &reqna->up;
844	netmap_adapter_get(*na);
845
846	/* keep the reference to the parent.
847	 * It will be released by the req destructor
848	 */
849
850	return 0;
851
852free_sna:
853	nm_os_free(sna);
854unregister_mna:
855	netmap_pipe_remove(pna, mna);
856free_mna:
857	nm_os_free(mna);
858put_out:
859	netmap_unget_na(pna, ifp);
860	return error;
861}
862
863
864#endif /* WITH_PIPES */
865