if_epair.c revision 195892
1/*-
2 * Copyright (c) 2008 The FreeBSD Foundation
3 * Copyright (c) 2009 Bjoern A. Zeeb <bz@FreeBSD.org>
4 * All rights reserved.
5 *
6 * This software was developed by CK Software GmbH under sponsorship
7 * from the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31/*
32 * A pair of virtual back-to-back connected ethernet like interfaces
33 * (``two interfaces with a virtual cross-over cable'').
34 *
35 * This is mostly intended to be used to provide connectivity between
36 * different virtual network stack instances.
37 */
38/*
39 * Things to re-think once we have more experience:
40 * - ifp->if_reassign function once we can test with vimage. Depending on
41 *   how if_vomve() is going to be improved.
42 * - Real random etheraddrs that are checked to be uniquish; we would need
43 *   to re-do them in case we move the interface between network stacks
44 *   in a private if_reassign function.
45 *   In case we bridge to a real interface/network or between indepedent
46 *   epairs on multiple stacks/machines, we may need this.
47 *   For now let the user handle that case.
48 */
49
50#include <sys/cdefs.h>
51__FBSDID("$FreeBSD: head/sys/net/if_epair.c 195892 2009-07-26 12:20:07Z bz $");
52
53#include <sys/param.h>
54#include <sys/kernel.h>
55#include <sys/mbuf.h>
56#include <sys/module.h>
57#include <sys/refcount.h>
58#include <sys/queue.h>
59#include <sys/smp.h>
60#include <sys/socket.h>
61#include <sys/sockio.h>
62#include <sys/sysctl.h>
63#include <sys/types.h>
64#include <sys/vimage.h>
65
66#include <net/bpf.h>
67#include <net/ethernet.h>
68#include <net/if.h>
69#include <net/if_clone.h>
70#include <net/if_var.h>
71#include <net/if_types.h>
72#include <net/netisr.h>
73
74#define	EPAIRNAME	"epair"
75
76SYSCTL_DECL(_net_link);
77SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW, 0, "epair sysctl");
78
79#ifdef EPAIR_DEBUG
80static int epair_debug = 0;
81SYSCTL_XINT(_net_link_epair, OID_AUTO, epair_debug, CTLFLAG_RW,
82    &epair_debug, 0, "if_epair(4) debugging.");
83#define	DPRINTF(fmt, arg...)						\
84	if (epair_debug)						\
85		printf("[%s:%d] " fmt, __func__, __LINE__, ##arg)
86#else
87#define	DPRINTF(fmt, arg...)
88#endif
89
90static void epair_nh_sintr(struct mbuf *);
91static struct mbuf *epair_nh_m2cpuid(struct mbuf *, uintptr_t, u_int *);
92static void epair_nh_drainedcpu(u_int);
93
94static void epair_start_locked(struct ifnet *);
95
96static int epair_clone_match(struct if_clone *, const char *);
97static int epair_clone_create(struct if_clone *, char *, size_t, caddr_t);
98static int epair_clone_destroy(struct if_clone *, struct ifnet *);
99
100/* Netisr realted definitions and sysctl. */
101static struct netisr_handler epair_nh = {
102	.nh_name	= EPAIRNAME,
103	.nh_proto	= NETISR_EPAIR,
104	.nh_policy	= NETISR_POLICY_CPU,
105	.nh_handler	= epair_nh_sintr,
106	.nh_m2cpuid	= epair_nh_m2cpuid,
107	.nh_drainedcpu	= epair_nh_drainedcpu,
108};
109
110static int
111sysctl_epair_netisr_maxqlen(SYSCTL_HANDLER_ARGS)
112{
113	int error, qlimit;
114
115	netisr_getqlimit(&epair_nh, &qlimit);
116	error = sysctl_handle_int(oidp, &qlimit, 0, req);
117	if (error || !req->newptr)
118		return (error);
119	if (qlimit < 1)
120		return (EINVAL);
121	return (netisr_setqlimit(&epair_nh, qlimit));
122}
123SYSCTL_PROC(_net_link_epair, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW,
124    0, 0, sysctl_epair_netisr_maxqlen, "I",
125    "Maximum if_epair(4) netisr \"hw\" queue length");
126
127struct epair_softc {
128	struct ifnet	*ifp;		/* This ifp. */
129	struct ifnet	*oifp;		/* other ifp of pair. */
130	u_int		refcount;	/* # of mbufs in flight. */
131	u_int		cpuid;		/* CPU ID assigned upon creation. */
132	void		(*if_qflush)(struct ifnet *);
133					/* Original if_qflush routine. */
134};
135
136/*
137 * Per-CPU list of ifps with data in the ifq that needs to be flushed
138 * to the netisr ``hw'' queue before we allow any further direct queuing
139 * to the ``hw'' queue.
140 */
141struct epair_ifp_drain {
142	STAILQ_ENTRY(epair_ifp_drain)	ifp_next;
143	struct ifnet			*ifp;
144};
145STAILQ_HEAD(eid_list, epair_ifp_drain);
146
147#define	EPAIR_LOCK_INIT(dpcpu)		mtx_init(&(dpcpu)->if_epair_mtx, \
148					    "if_epair", NULL, MTX_DEF)
149#define	EPAIR_LOCK_DESTROY(dpcpu)	mtx_destroy(&(dpcpu)->if_epair_mtx)
150#define	EPAIR_LOCK_ASSERT(dpcpu)	mtx_assert(&(dpcpu)->if_epair_mtx, \
151					    MA_OWNED)
152#define	EPAIR_LOCK(dpcpu)		mtx_lock(&(dpcpu)->if_epair_mtx)
153#define	EPAIR_UNLOCK(dpcpu)		mtx_unlock(&(dpcpu)->if_epair_mtx)
154
155#ifdef INVARIANTS
156#define	EPAIR_REFCOUNT_INIT(r, v)	refcount_init((r), (v))
157#define	EPAIR_REFCOUNT_AQUIRE(r)	refcount_acquire((r))
158#define	EPAIR_REFCOUNT_RELEASE(r)	refcount_release((r))
159#define	EPAIR_REFCOUNT_ASSERT(a, p)	KASSERT(a, p)
160#else
161#define	EPAIR_REFCOUNT_INIT(r, v)
162#define	EPAIR_REFCOUNT_AQUIRE(r)
163#define	EPAIR_REFCOUNT_RELEASE(r)
164#define	EPAIR_REFCOUNT_ASSERT(a, p)
165#endif
166
167static MALLOC_DEFINE(M_EPAIR, EPAIRNAME,
168    "Pair of virtual cross-over connected Ethernet-like interfaces");
169
170static struct if_clone epair_cloner = IFC_CLONE_INITIALIZER(
171    EPAIRNAME, NULL, IF_MAXUNIT,
172    NULL, epair_clone_match, epair_clone_create, epair_clone_destroy);
173
174/*
175 * DPCPU area and functions.
176 */
177struct epair_dpcpu {
178	struct mtx	if_epair_mtx;		/* Per-CPU locking. */
179	int		epair_drv_flags;	/* Per-CPU ``hw'' drv flags. */
180	struct eid_list	epair_ifp_drain_list;	/* Per-CPU list of ifps with
181						 * data in the ifq. */
182};
183DPCPU_DEFINE(struct epair_dpcpu, epair_dpcpu);
184
185static void
186epair_dpcpu_init(void)
187{
188	struct epair_dpcpu *epair_dpcpu;
189	struct eid_list *s;
190	u_int cpuid;
191
192	for (cpuid = 0; cpuid <= mp_maxid; cpuid++) {
193		if (CPU_ABSENT(cpuid))
194			continue;
195
196		epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu);
197
198		/* Initialize per-cpu lock. */
199		EPAIR_LOCK_INIT(epair_dpcpu);
200
201		/* Driver flags are per-cpu as are our netisr "hw" queues. */
202		epair_dpcpu->epair_drv_flags = 0;
203
204		/*
205		 * Initialize per-cpu drain list.
206		 * Manually do what STAILQ_HEAD_INITIALIZER would do.
207		 */
208		s = &epair_dpcpu->epair_ifp_drain_list;
209		s->stqh_first = NULL;
210		s->stqh_last = &s->stqh_first;
211	}
212}
213
214static void
215epair_dpcpu_detach(void)
216{
217	struct epair_dpcpu *epair_dpcpu;
218	u_int cpuid;
219
220	for (cpuid = 0; cpuid <= mp_maxid; cpuid++) {
221		if (CPU_ABSENT(cpuid))
222			continue;
223
224		epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu);
225
226		/* Destroy per-cpu lock. */
227		EPAIR_LOCK_DESTROY(epair_dpcpu);
228	}
229}
230
231/*
232 * Helper functions.
233 */
234static u_int
235cpuid_from_ifp(struct ifnet *ifp)
236{
237	struct epair_softc *sc;
238
239	if (ifp == NULL)
240		return (0);
241	sc = ifp->if_softc;
242
243	return (sc->cpuid);
244}
245
246/*
247 * Netisr handler functions.
248 */
249static void
250epair_nh_sintr(struct mbuf *m)
251{
252	struct ifnet *ifp;
253	struct epair_softc *sc;
254
255	ifp = m->m_pkthdr.rcvif;
256	(*ifp->if_input)(ifp, m);
257	sc = ifp->if_softc;
258	EPAIR_REFCOUNT_RELEASE(&sc->refcount);
259	DPRINTF("ifp=%p refcount=%u\n", ifp, sc->refcount);
260}
261
262static struct mbuf *
263epair_nh_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid)
264{
265
266	*cpuid = cpuid_from_ifp(m->m_pkthdr.rcvif);
267
268	return (m);
269}
270
271static void
272epair_nh_drainedcpu(u_int cpuid)
273{
274	struct epair_dpcpu *epair_dpcpu;
275	struct epair_ifp_drain *elm, *tvar;
276	struct ifnet *ifp;
277
278	epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu);
279	EPAIR_LOCK(epair_dpcpu);
280	/*
281	 * Assume our "hw" queue and possibly ifq will be emptied
282	 * again. In case we will overflow the "hw" queue while
283	 * draining, epair_start_locked will set IFF_DRV_OACTIVE
284	 * again and we will stop and return.
285	 */
286	STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list,
287	    ifp_next, tvar) {
288		ifp = elm->ifp;
289		epair_dpcpu->epair_drv_flags &= ~IFF_DRV_OACTIVE;
290		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
291		epair_start_locked(ifp);
292
293		IFQ_LOCK(&ifp->if_snd);
294		if (IFQ_IS_EMPTY(&ifp->if_snd)) {
295			STAILQ_REMOVE(&epair_dpcpu->epair_ifp_drain_list,
296			    elm, epair_ifp_drain, ifp_next);
297			free(elm, M_EPAIR);
298		}
299		IFQ_UNLOCK(&ifp->if_snd);
300
301		if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) != 0) {
302			/* Our "hw"q overflew again. */
303			epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE
304			DPRINTF("hw queue length overflow at %u\n",
305			    epair_nh.nh_qlimit);
306			break;
307		}
308	}
309	EPAIR_UNLOCK(epair_dpcpu);
310}
311
312/*
313 * Network interface (`if') related functions.
314 */
315static int
316epair_add_ifp_for_draining(struct ifnet *ifp)
317{
318	struct epair_dpcpu *epair_dpcpu;
319	struct epair_softc *sc = sc = ifp->if_softc;
320	struct epair_ifp_drain *elm = NULL;
321
322	epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu);
323	STAILQ_FOREACH(elm, &epair_dpcpu->epair_ifp_drain_list, ifp_next)
324		if (elm->ifp == ifp)
325			break;
326	/* If the ipf is there already, return success. */
327	if (elm != NULL)
328		return (0);
329
330	elm = malloc(sizeof(struct epair_ifp_drain), M_EPAIR, M_NOWAIT|M_ZERO);
331	if (elm == NULL)
332		return (ENOMEM);
333
334	elm->ifp = ifp;
335	STAILQ_INSERT_TAIL(&epair_dpcpu->epair_ifp_drain_list, elm, ifp_next);
336
337	return (0);
338}
339
340static void
341epair_start_locked(struct ifnet *ifp)
342{
343	struct epair_dpcpu *epair_dpcpu;
344	struct mbuf *m;
345	struct epair_softc *sc;
346	struct ifnet *oifp;
347	int error;
348
349	DPRINTF("ifp=%p\n", ifp);
350	sc = ifp->if_softc;
351	epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu);
352	EPAIR_LOCK_ASSERT(epair_dpcpu);
353
354	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
355		return;
356	if ((ifp->if_flags & IFF_UP) == 0)
357		return;
358
359	/*
360	 * We get patckets here from ether_output via if_handoff()
361	 * and ned to put them into the input queue of the oifp
362	 * and call oifp->if_input() via netisr/epair_sintr().
363	 */
364	oifp = sc->oifp;
365	sc = oifp->if_softc;
366	for (;;) {
367		IFQ_DEQUEUE(&ifp->if_snd, m);
368		if (m == NULL)
369			break;
370		BPF_MTAP(ifp, m);
371
372		/*
373		 * In case the outgoing interface is not usable,
374		 * drop the packet.
375		 */
376		if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
377		    (oifp->if_flags & IFF_UP) ==0) {
378			ifp->if_oerrors++;
379			m_freem(m);
380			continue;
381		}
382		DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname);
383
384		/*
385		 * Add a reference so the interface cannot go while the
386		 * packet is in transit as we rely on rcvif to stay valid.
387		 */
388		EPAIR_REFCOUNT_AQUIRE(&sc->refcount);
389		m->m_pkthdr.rcvif = oifp;
390		CURVNET_SET_QUIET(oifp->if_vnet);
391		error = netisr_queue(NETISR_EPAIR, m);
392		CURVNET_RESTORE();
393		if (!error) {
394			ifp->if_opackets++;
395			/* Someone else received the packet. */
396			oifp->if_ipackets++;
397		} else {
398			epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE;
399			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
400			if (epair_add_ifp_for_draining(ifp)) {
401				ifp->if_oerrors++;
402				m_freem(m);
403			}
404			EPAIR_REFCOUNT_RELEASE(&sc->refcount);
405		}
406	}
407}
408
409static void
410epair_start(struct ifnet *ifp)
411{
412	struct epair_dpcpu *epair_dpcpu;
413
414	epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu);
415	EPAIR_LOCK(epair_dpcpu);
416	epair_start_locked(ifp);
417	EPAIR_UNLOCK(epair_dpcpu);
418}
419
420static int
421epair_transmit_locked(struct ifnet *ifp, struct mbuf *m)
422{
423	struct epair_dpcpu *epair_dpcpu;
424	struct epair_softc *sc;
425	struct ifnet *oifp;
426	int error, len;
427	short mflags;
428
429	DPRINTF("ifp=%p m=%p\n", ifp, m);
430	sc = ifp->if_softc;
431	epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu);
432	EPAIR_LOCK_ASSERT(epair_dpcpu);
433
434	if (m == NULL)
435		return (0);
436
437	/*
438	 * We are not going to use the interface en/dequeue mechanism
439	 * on the TX side. We are called from ether_output_frame()
440	 * and will put the packet into the incoming queue of the
441	 * other interface of our pair via the netsir.
442	 */
443	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
444		m_freem(m);
445		return (ENXIO);
446	}
447	if ((ifp->if_flags & IFF_UP) == 0) {
448		m_freem(m);
449		return (ENETDOWN);
450	}
451
452	BPF_MTAP(ifp, m);
453
454	/*
455	 * In case the outgoing interface is not usable,
456	 * drop the packet.
457	 */
458	oifp = sc->oifp;
459	if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
460	    (oifp->if_flags & IFF_UP) ==0) {
461		ifp->if_oerrors++;
462		m_freem(m);
463		return (0);
464	}
465	len = m->m_pkthdr.len;
466	mflags = m->m_flags;
467	DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname);
468
469#ifdef ALTQ
470	/* Support ALTQ via the clasic if_start() path. */
471	IF_LOCK(&ifp->if_snd);
472	if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
473		ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error);
474		if (error)
475			ifp->if_snd.ifq_drops++;
476		IF_UNLOCK(&ifp->if_snd);
477		if (!error) {
478			ifp->if_obytes += len;
479			if (mflags & (M_BCAST|M_MCAST))
480				ifp->if_omcasts++;
481
482			if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0)
483				epair_start_locked(ifp);
484			else
485				(void)epair_add_ifp_for_draining(ifp);
486		}
487		return (error);
488	}
489	IF_UNLOCK(&ifp->if_snd);
490#endif
491
492	if ((epair_dpcpu->epair_drv_flags & IFF_DRV_OACTIVE) != 0) {
493		/*
494		 * Our hardware queue is full, try to fall back
495		 * queuing to the ifq but do not call ifp->if_start.
496		 * Either we are lucky or the packet is gone.
497		 */
498		IFQ_ENQUEUE(&ifp->if_snd, m, error);
499		if (!error)
500			(void)epair_add_ifp_for_draining(ifp);
501		return (error);
502	}
503	sc = oifp->if_softc;
504	/*
505	 * Add a reference so the interface cannot go while the
506	 * packet is in transit as we rely on rcvif to stay valid.
507	 */
508	EPAIR_REFCOUNT_AQUIRE(&sc->refcount);
509	m->m_pkthdr.rcvif = oifp;
510	CURVNET_SET_QUIET(oifp->if_vnet);
511	error = netisr_queue(NETISR_EPAIR, m);
512	CURVNET_RESTORE();
513	if (!error) {
514		ifp->if_opackets++;
515		/*
516		 * IFQ_HANDOFF_ADJ/ip_handoff() update statistics,
517		 * but as we bypass all this we have to duplicate
518		 * the logic another time.
519		 */
520		ifp->if_obytes += len;
521		if (mflags & (M_BCAST|M_MCAST))
522			ifp->if_omcasts++;
523		/* Someone else received the packet. */
524		oifp->if_ipackets++;
525	} else {
526		/* The packet was freed already. */
527		EPAIR_REFCOUNT_RELEASE(&sc->refcount);
528		epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE;
529		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
530	}
531
532	return (error);
533}
534
535static int
536epair_transmit(struct ifnet *ifp, struct mbuf *m)
537{
538	struct epair_dpcpu *epair_dpcpu;
539	int error;
540
541	epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu);
542	EPAIR_LOCK(epair_dpcpu);
543	error = epair_transmit_locked(ifp, m);
544	EPAIR_UNLOCK(epair_dpcpu);
545	return (error);
546}
547
548static void
549epair_qflush(struct ifnet *ifp)
550{
551	struct epair_dpcpu *epair_dpcpu;
552	struct epair_softc *sc;
553	struct ifaltq *ifq;
554
555	sc = ifp->if_softc;
556	epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu);
557	EPAIR_LOCK(epair_dpcpu);
558	ifq = &ifp->if_snd;
559	DPRINTF("ifp=%p sc refcnt=%u ifq_len=%u\n",
560	    ifp, sc->refcount, ifq->ifq_len);
561	/*
562	 * Instead of calling EPAIR_REFCOUNT_RELEASE(&sc->refcount);
563	 * n times, just subtract for the cleanup.
564	 */
565	sc->refcount -= ifq->ifq_len;
566	EPAIR_UNLOCK(epair_dpcpu);
567	if (sc->if_qflush)
568		sc->if_qflush(ifp);
569}
570
571static int
572epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
573{
574	struct ifreq *ifr;
575	int error;
576
577	ifr = (struct ifreq *)data;
578	switch (cmd) {
579	case SIOCSIFFLAGS:
580	case SIOCADDMULTI:
581	case SIOCDELMULTI:
582		error = 0;
583		break;
584
585	case SIOCSIFMTU:
586		/* We basically allow all kinds of MTUs. */
587		ifp->if_mtu = ifr->ifr_mtu;
588		error = 0;
589		break;
590
591	default:
592		/* Let the common ethernet handler process this. */
593		error = ether_ioctl(ifp, cmd, data);
594		break;
595	}
596
597	return (error);
598}
599
600static void
601epair_init(void *dummy __unused)
602{
603}
604
605
606/*
607 * Interface cloning functions.
608 * We use our private ones so that we can create/destroy our secondary
609 * device along with the primary one.
610 */
611static int
612epair_clone_match(struct if_clone *ifc, const char *name)
613{
614	const char *cp;
615
616	DPRINTF("name='%s'\n", name);
617
618	/*
619	 * Our base name is epair.
620	 * Our interfaces will be named epair<n>[ab].
621	 * So accept anything of the following list:
622	 * - epair
623	 * - epair<n>
624	 * but not the epair<n>[ab] versions.
625	 */
626	if (strncmp(EPAIRNAME, name, sizeof(EPAIRNAME)-1) != 0)
627		return (0);
628
629	for (cp = name + sizeof(EPAIRNAME) - 1; *cp != '\0'; cp++) {
630		if (*cp < '0' || *cp > '9')
631			return (0);
632	}
633
634	return (1);
635}
636
637static int
638epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
639{
640	struct epair_softc *sca, *scb;
641	struct ifnet *ifp;
642	char *dp;
643	int error, unit, wildcard;
644	uint8_t eaddr[ETHER_ADDR_LEN];	/* 00:00:00:00:00:00 */
645
646	/*
647	 * We are abusing params to create our second interface.
648	 * Actually we already created it and called if_clone_createif()
649	 * for it to do the official insertion procedure the moment we knew
650	 * it cannot fail anymore. So just do attach it here.
651	 */
652	if (params) {
653		scb = (struct epair_softc *)params;
654		ifp = scb->ifp;
655		/* Assign a hopefully unique, locally administered etheraddr. */
656		eaddr[0] = 0x02;
657		eaddr[3] = (ifp->if_index >> 8) & 0xff;
658		eaddr[4] = ifp->if_index & 0xff;
659		eaddr[5] = 0x0b;
660		ether_ifattach(ifp, eaddr);
661		/* Correctly set the name for the cloner list. */
662		strlcpy(name, scb->ifp->if_xname, len);
663		return (0);
664	}
665
666	/* Try to see if a special unit was requested. */
667	error = ifc_name2unit(name, &unit);
668	if (error != 0)
669		return (error);
670	wildcard = (unit < 0);
671
672	error = ifc_alloc_unit(ifc, &unit);
673	if (error != 0)
674		return (error);
675
676	/*
677	 * If no unit had been given, we need to adjust the ifName.
678	 * Also make sure there is space for our extra [ab] suffix.
679	 */
680	for (dp = name; *dp != '\0'; dp++);
681	if (wildcard) {
682		error = snprintf(dp, len - (dp - name), "%d", unit);
683		if (error > len - (dp - name) - 1) {
684			/* ifName too long. */
685			ifc_free_unit(ifc, unit);
686			return (ENOSPC);
687		}
688		dp += error;
689	}
690	if (len - (dp - name) - 1 < 1) {
691		/* No space left for our [ab] suffix. */
692		ifc_free_unit(ifc, unit);
693		return (ENOSPC);
694	}
695	*dp = 'a';
696	/* Must not change dp so we can replace 'a' by 'b' later. */
697	*(dp+1) = '\0';
698
699	/* Allocate memory for both [ab] interfaces */
700	sca = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO);
701	EPAIR_REFCOUNT_INIT(&sca->refcount, 1);
702	sca->ifp = if_alloc(IFT_ETHER);
703	if (sca->ifp == NULL) {
704		free(sca, M_EPAIR);
705		ifc_free_unit(ifc, unit);
706		return (ENOSPC);
707	}
708
709	scb = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO);
710	EPAIR_REFCOUNT_INIT(&scb->refcount, 1);
711	scb->ifp = if_alloc(IFT_ETHER);
712	if (scb->ifp == NULL) {
713		free(scb, M_EPAIR);
714		if_free(sca->ifp);
715		free(sca, M_EPAIR);
716		ifc_free_unit(ifc, unit);
717		return (ENOSPC);
718	}
719
720	/*
721	 * Cross-reference the interfaces so we will be able to free both.
722	 */
723	sca->oifp = scb->ifp;
724	scb->oifp = sca->ifp;
725
726	/*
727	 * Calculate the cpuid for netisr queueing based on the
728	 * ifIndex of the interfaces. As long as we cannot configure
729	 * this or use cpuset information easily we cannot guarantee
730	 * cache locality but we can at least allow parallelism.
731	 */
732	sca->cpuid =
733	    netisr_get_cpuid(sca->ifp->if_index % netisr_get_cpucount());
734	scb->cpuid =
735	    netisr_get_cpuid(scb->ifp->if_index % netisr_get_cpucount());
736
737	/* Finish initialization of interface <n>a. */
738	ifp = sca->ifp;
739	ifp->if_softc = sca;
740	strlcpy(ifp->if_xname, name, IFNAMSIZ);
741	ifp->if_dname = ifc->ifc_name;
742	ifp->if_dunit = unit;
743	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
744	ifp->if_start = epair_start;
745	ifp->if_ioctl = epair_ioctl;
746	ifp->if_init  = epair_init;
747	ifp->if_snd.ifq_maxlen = ifqmaxlen;
748	/* Assign a hopefully unique, locally administered etheraddr. */
749	eaddr[0] = 0x02;
750	eaddr[3] = (ifp->if_index >> 8) & 0xff;
751	eaddr[4] = ifp->if_index & 0xff;
752	eaddr[5] = 0x0a;
753	ether_ifattach(ifp, eaddr);
754	sca->if_qflush = ifp->if_qflush;
755	ifp->if_qflush = epair_qflush;
756	ifp->if_transmit = epair_transmit;
757	ifp->if_baudrate = IF_Gbps(10UL);	/* arbitrary maximum */
758
759	/* Swap the name and finish initialization of interface <n>b. */
760	*dp = 'b';
761
762	ifp = scb->ifp;
763	ifp->if_softc = scb;
764	strlcpy(ifp->if_xname, name, IFNAMSIZ);
765	ifp->if_dname = ifc->ifc_name;
766	ifp->if_dunit = unit;
767	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
768	ifp->if_start = epair_start;
769	ifp->if_ioctl = epair_ioctl;
770	ifp->if_init  = epair_init;
771	ifp->if_snd.ifq_maxlen = ifqmaxlen;
772	/* We need to play some tricks here for the second interface. */
773	strlcpy(name, EPAIRNAME, len);
774	error = if_clone_create(name, len, (caddr_t)scb);
775	if (error)
776		panic("%s: if_clone_createif() for our 2nd iface failed: %d",
777		    __func__, error);
778	scb->if_qflush = ifp->if_qflush;
779	ifp->if_qflush = epair_qflush;
780	ifp->if_transmit = epair_transmit;
781	ifp->if_baudrate = IF_Gbps(10UL);	/* arbitrary maximum */
782
783	/*
784	 * Restore name to <n>a as the ifp for this will go into the
785	 * cloner list for the initial call.
786	 */
787	strlcpy(name, sca->ifp->if_xname, len);
788	DPRINTF("name='%s/%db' created sca=%p scb=%p\n", name, unit, sca, scb);
789
790	/* Tell the world, that we are ready to rock. */
791	sca->ifp->if_drv_flags |= IFF_DRV_RUNNING;
792	scb->ifp->if_drv_flags |= IFF_DRV_RUNNING;
793
794	return (0);
795}
796
797static int
798epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
799{
800	struct ifnet *oifp;
801	struct epair_softc *sca, *scb;
802	int unit, error;
803
804	DPRINTF("ifp=%p\n", ifp);
805
806	/*
807	 * In case we called into if_clone_destroyif() ourselves
808	 * again to remove the second interface, the softc will be
809	 * NULL. In that case so not do anything but return success.
810	 */
811	if (ifp->if_softc == NULL)
812		return (0);
813
814	unit = ifp->if_dunit;
815	sca = ifp->if_softc;
816	oifp = sca->oifp;
817	scb = oifp->if_softc;
818
819	DPRINTF("ifp=%p oifp=%p\n", ifp, oifp);
820	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
821	oifp->if_drv_flags &= ~IFF_DRV_RUNNING;
822	ether_ifdetach(oifp);
823	ether_ifdetach(ifp);
824	/*
825	 * Wait for all packets to be dispatched to if_input.
826	 * The numbers can only go down as the interfaces are
827	 * detached so there is no need to use atomics.
828	 */
829	DPRINTF("sca refcnt=%u scb refcnt=%u\n", sca->refcount, scb->refcount);
830	EPAIR_REFCOUNT_ASSERT(sca->refcount == 1 && scb->refcount == 1,
831	    ("%s: sca->refcount!=1: %d || scb->refcount!=1: %d",
832	    __func__, sca->refcount, scb->refcount));
833
834	/*
835	 * Get rid of our second half.
836	 */
837	oifp->if_softc = NULL;
838	error = if_clone_destroyif(ifc, oifp);
839	if (error)
840		panic("%s: if_clone_destroyif() for our 2nd iface failed: %d",
841		    __func__, error);
842
843	/*
844	 * Finish cleaning up. Free them and release the unit.
845	 * As the other of the two interfaces my reside in a different vnet,
846	 * we need to switch before freeing them.
847	 */
848	CURVNET_SET_QUIET(oifp->if_vnet);
849	if_free_type(oifp, IFT_ETHER);
850	CURVNET_RESTORE();
851	if_free_type(ifp, IFT_ETHER);
852	free(scb, M_EPAIR);
853	free(sca, M_EPAIR);
854	ifc_free_unit(ifc, unit);
855
856	return (0);
857}
858
859static int
860epair_modevent(module_t mod, int type, void *data)
861{
862	int qlimit;
863
864	switch (type) {
865	case MOD_LOAD:
866		/* For now limit us to one global mutex and one inq. */
867		epair_dpcpu_init();
868		epair_nh.nh_qlimit = 42 * ifqmaxlen; /* 42 shall be the number. */
869		if (TUNABLE_INT_FETCH("net.link.epair.netisr_maxqlen", &qlimit))
870		    epair_nh.nh_qlimit = qlimit;
871		netisr_register(&epair_nh);
872		if_clone_attach(&epair_cloner);
873		if (bootverbose)
874			printf("%s initialized.\n", EPAIRNAME);
875		break;
876	case MOD_UNLOAD:
877		if_clone_detach(&epair_cloner);
878		netisr_unregister(&epair_nh);
879		epair_dpcpu_detach();
880		if (bootverbose)
881			printf("%s unloaded.\n", EPAIRNAME);
882		break;
883	default:
884		return (EOPNOTSUPP);
885	}
886	return (0);
887}
888
889static moduledata_t epair_mod = {
890	"if_epair",
891	epair_modevent,
892	0
893};
894
895DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
896MODULE_VERSION(if_epair, 1);
897