if_epair.c revision 194927
1194927Sbz/*-
2194927Sbz * Copyright (c) 2008 The FreeBSD Foundation
3194927Sbz * All rights reserved.
4194927Sbz *
5194927Sbz * This software was developed by CK Software GmbH under sponsorship
6194927Sbz * from the FreeBSD Foundation.
7194927Sbz *
8194927Sbz * Redistribution and use in source and binary forms, with or without
9194927Sbz * modification, are permitted provided that the following conditions
10194927Sbz * are met:
11194927Sbz * 1. Redistributions of source code must retain the above copyright
12194927Sbz * notice, this list of conditions and the following disclaimer.
13194927Sbz * 2. Redistributions in binary form must reproduce the above copyright
14194927Sbz * notice, this list of conditions and the following disclaimer in the
15194927Sbz * documentation and/or other materials provided with the distribution.
16194927Sbz *
17194927Sbz * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18194927Sbz * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19194927Sbz * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20194927Sbz * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21194927Sbz * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22194927Sbz * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23194927Sbz * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24194927Sbz * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25194927Sbz * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26194927Sbz * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27194927Sbz * SUCH DAMAGE.
28194927Sbz */
29194927Sbz
30194927Sbz/*
31194927Sbz * A pair of virtual ethernet interfaces directly connected with
32194927Sbz * a virtual cross-over cable.
33194927Sbz * This is mostly intended to be used to provide connectivity between
34194927Sbz * different virtual network stack instances.
35194927Sbz */
36194927Sbz/*
37194927Sbz * Things to re-think once we have more experience:
38194927Sbz * - ifp->if_reassign function once we can test with vimage.
39194927Sbz * - Real random etheraddrs that are checked to be uniquish;
40194927Sbz *   in case we bridge we may need this or let the user handle that case?
41194927Sbz * - netisr and callback logic.
42194927Sbz * - netisr queue lengths.
43194927Sbz */
44194927Sbz
45194927Sbz#include <sys/cdefs.h>
46194927Sbz__FBSDID("$FreeBSD: head/sys/net/if_epair.c 194927 2009-06-24 22:21:30Z bz $");
47194927Sbz
48194927Sbz#include <sys/param.h>
49194927Sbz#include <sys/kernel.h>
50194927Sbz#include <sys/mbuf.h>
51194927Sbz#include <sys/module.h>
52194927Sbz#include <sys/refcount.h>
53194927Sbz#include <sys/queue.h>
54194927Sbz#include <sys/socket.h>
55194927Sbz#include <sys/sockio.h>
56194927Sbz#include <sys/sysctl.h>
57194927Sbz#include <sys/types.h>
58194927Sbz#include <sys/vimage.h>
59194927Sbz
60194927Sbz#include <net/bpf.h>
61194927Sbz#include <net/ethernet.h>
62194927Sbz#include <net/if.h>
63194927Sbz#include <net/if_clone.h>
64194927Sbz#include <net/if_var.h>
65194927Sbz#include <net/if_types.h>
66194927Sbz#include <net/netisr.h>
67194927Sbz
68194927Sbz#define	EPAIRNAME	"epair"
69194927Sbz
70194927Sbz#ifdef DEBUG_EPAIR
71194927Sbzstatic int epair_debug = 0;
72194927SbzSYSCTL_DECL(_net_link);
73194927SbzSYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW, 0, "epair sysctl");
74194927SbzSYSCTL_XINT(_net_link_epair, OID_AUTO, epair_debug, CTLFLAG_RW,
75194927Sbz    &epair_debug, 0, "if_epair(4) debugging.");
76194927Sbz#define	DPRINTF(fmt, arg...)	if (epair_debug) \
77194927Sbz    printf("[%s:%d] " fmt, __func__, __LINE__, ##arg)
78194927Sbz#else
79194927Sbz#define	DPRINTF(fmt, arg...)
80194927Sbz#endif
81194927Sbz
82194927Sbzstruct epair_softc {
83194927Sbz	struct ifnet	*ifp;
84194927Sbz	struct ifnet	*oifp;
85194927Sbz	u_int		refcount;
86194927Sbz	void		(*if_qflush)(struct ifnet *);
87194927Sbz};
88194927Sbz
89194927Sbzstruct epair_ifp_drain {
90194927Sbz	STAILQ_ENTRY(epair_ifp_drain)	ifp_next;
91194927Sbz	struct ifnet			*ifp;
92194927Sbz};
93194927Sbz
94194927Sbzstatic STAILQ_HEAD(, epair_ifp_drain) epair_ifp_drain_list =
95194927Sbz    STAILQ_HEAD_INITIALIZER(epair_ifp_drain_list);
96194927Sbz
97194927Sbz#define ADD_IFQ_FOR_DRAINING(ifp)					\
98194927Sbz	do {								\
99194927Sbz		struct epair_ifp_drain *elm = NULL;			\
100194927Sbz									\
101194927Sbz		STAILQ_FOREACH(elm, &epair_ifp_drain_list, ifp_next) {	\
102194927Sbz			if (elm->ifp == (ifp))				\
103194927Sbz				break;					\
104194927Sbz		}							\
105194927Sbz		if (elm == NULL) {					\
106194927Sbz			elm = malloc(sizeof(struct epair_ifp_drain),	\
107194927Sbz			    M_EPAIR, M_ZERO);				\
108194927Sbz			if (elm != NULL) {				\
109194927Sbz				elm->ifp = (ifp);			\
110194927Sbz				STAILQ_INSERT_TAIL(			\
111194927Sbz				    &epair_ifp_drain_list,		\
112194927Sbz			    	    elm, ifp_next);			\
113194927Sbz			}						\
114194927Sbz		}							\
115194927Sbz	} while(0)
116194927Sbz
117194927Sbz/* Our "hw" tx queue. */
118194927Sbzstatic struct ifqueue epairinq;
119194927Sbzstatic int epair_drv_flags;
120194927Sbz
121194927Sbzstatic struct mtx if_epair_mtx;
122194927Sbz#define	EPAIR_LOCK_INIT()	mtx_init(&if_epair_mtx, "if_epair", \
123194927Sbz				    NULL, MTX_DEF)
124194927Sbz#define	EPAIR_LOCK_DESTROY()	mtx_destroy(&if_epair_mtx)
125194927Sbz#define	EPAIR_LOCK_ASSERT()	mtx_assert(&if_epair_mtx, MA_OWNED)
126194927Sbz#define	EPAIR_LOCK()		mtx_lock(&if_epair_mtx)
127194927Sbz#define	EPAIR_UNLOCK()		mtx_unlock(&if_epair_mtx)
128194927Sbz
129194927Sbzstatic MALLOC_DEFINE(M_EPAIR, EPAIRNAME,
130194927Sbz    "Pair of virtual cross-over connected Ethernet-like interfaces");
131194927Sbz
132194927Sbzstatic int epair_clone_match(struct if_clone *, const char *);
133194927Sbzstatic int epair_clone_create(struct if_clone *, char *, size_t, caddr_t);
134194927Sbzstatic int epair_clone_destroy(struct if_clone *, struct ifnet *);
135194927Sbz
136194927Sbzstatic void epair_start_locked(struct ifnet *);
137194927Sbz
138194927Sbzstatic struct if_clone epair_cloner = IFC_CLONE_INITIALIZER(
139194927Sbz    EPAIRNAME, NULL, IF_MAXUNIT,
140194927Sbz    NULL, epair_clone_match, epair_clone_create, epair_clone_destroy);
141194927Sbz
142194927Sbz
143194927Sbz/*
144194927Sbz * Netisr handler functions.
145194927Sbz */
146194927Sbzstatic void
147194927Sbzepair_sintr(struct mbuf *m)
148194927Sbz{
149194927Sbz	struct ifnet *ifp;
150194927Sbz	struct epair_softc *sc;
151194927Sbz
152194927Sbz	ifp = m->m_pkthdr.rcvif;
153194927Sbz	(*ifp->if_input)(ifp, m);
154194927Sbz	sc = ifp->if_softc;
155194927Sbz	refcount_release(&sc->refcount);
156194927Sbz	DPRINTF("ifp=%p refcount=%u\n", ifp, sc->refcount);
157194927Sbz}
158194927Sbz
159194927Sbzstatic void
160194927Sbzepair_sintr_drained(void)
161194927Sbz{
162194927Sbz	struct epair_ifp_drain *elm, *tvar;
163194927Sbz	struct ifnet *ifp;
164194927Sbz
165194927Sbz	EPAIR_LOCK();
166194927Sbz	/*
167194927Sbz	 * Assume our "hw" queue and possibly ifq will be emptied
168194927Sbz	 * again. In case we will overflow the "hw" queue while
169194927Sbz	 * draining, epair_start_locked will set IFF_DRV_OACTIVE
170194927Sbz	 * again and we will stop and return.
171194927Sbz	 */
172194927Sbz	STAILQ_FOREACH_SAFE(elm, &epair_ifp_drain_list, ifp_next, tvar) {
173194927Sbz		ifp = elm->ifp;
174194927Sbz		epair_drv_flags &= ~IFF_DRV_OACTIVE;
175194927Sbz		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
176194927Sbz		epair_start_locked(ifp);
177194927Sbz
178194927Sbz		IFQ_LOCK(&ifp->if_snd);
179194927Sbz		if (IFQ_IS_EMPTY(&ifp->if_snd)) {
180194927Sbz			STAILQ_REMOVE(&epair_ifp_drain_list, elm,
181194927Sbz			    epair_ifp_drain, ifp_next);
182194927Sbz			free(elm, M_EPAIR);
183194927Sbz		}
184194927Sbz		IFQ_UNLOCK(&ifp->if_snd);
185194927Sbz
186194927Sbz		if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) != 0) {
187194927Sbz			/* Our "hw"q overflew again. */
188194927Sbz			epair_drv_flags |= IFF_DRV_OACTIVE
189194927Sbz			DPRINTF("hw queue length overflow at %u\n",
190194927Sbz			    epairinq.ifq_maxlen);
191194927Sbz#if 0
192194927Sbz			/* ``Auto-tuning.'' */
193194927Sbz			epairinq.ifq_maxlen += ifqmaxlen;
194194927Sbz#endif
195194927Sbz			break;
196194927Sbz		}
197194927Sbz	}
198194927Sbz	EPAIR_UNLOCK();
199194927Sbz}
200194927Sbz
201194927Sbz/*
202194927Sbz * Network interface (`if') related functions.
203194927Sbz */
204194927Sbzstatic void
205194927Sbzepair_start_locked(struct ifnet *ifp)
206194927Sbz{
207194927Sbz	struct mbuf *m;
208194927Sbz	struct epair_softc *sc;
209194927Sbz	struct ifnet *oifp;
210194927Sbz	int error;
211194927Sbz
212194927Sbz	EPAIR_LOCK_ASSERT();
213194927Sbz	DPRINTF("ifp=%p\n", ifp);
214194927Sbz
215194927Sbz	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
216194927Sbz		return;
217194927Sbz	if ((ifp->if_flags & IFF_UP) == 0)
218194927Sbz		return;
219194927Sbz
220194927Sbz	/*
221194927Sbz	 * We get patckets here from ether_output via if_handoff()
222194927Sbz	 * and ned to put them into the input queue of the oifp
223194927Sbz	 * and call oifp->if_input() via netisr/epair_sintr().
224194927Sbz	 */
225194927Sbz	sc = ifp->if_softc;
226194927Sbz	oifp = sc->oifp;
227194927Sbz	sc = oifp->if_softc;
228194927Sbz	for (;;) {
229194927Sbz		IFQ_DEQUEUE(&ifp->if_snd, m);
230194927Sbz		if (m == NULL)
231194927Sbz			break;
232194927Sbz		BPF_MTAP(ifp, m);
233194927Sbz
234194927Sbz		/*
235194927Sbz		 * In case the outgoing interface is not usable,
236194927Sbz		 * drop the packet.
237194927Sbz		 */
238194927Sbz		if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
239194927Sbz		    (oifp->if_flags & IFF_UP) ==0) {
240194927Sbz			ifp->if_oerrors++;
241194927Sbz			m_freem(m);
242194927Sbz			continue;
243194927Sbz		}
244194927Sbz		DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname);
245194927Sbz
246194927Sbz		/*
247194927Sbz		 * Add a reference so the interface cannot go while the
248194927Sbz		 * packet is in transit as we rely on rcvif to stay valid.
249194927Sbz		 */
250194927Sbz		refcount_acquire(&sc->refcount);
251194927Sbz		m->m_pkthdr.rcvif = oifp;
252194927Sbz		CURVNET_SET_QUIET(oifp->if_vnet);
253194927Sbz		error = netisr_queue(NETISR_EPAIR, m);
254194927Sbz		CURVNET_RESTORE();
255194927Sbz		if (!error) {
256194927Sbz			ifp->if_opackets++;
257194927Sbz			/* Someone else received the packet. */
258194927Sbz			oifp->if_ipackets++;
259194927Sbz		} else {
260194927Sbz			epair_drv_flags |= IFF_DRV_OACTIVE;
261194927Sbz			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
262194927Sbz			ADD_IFQ_FOR_DRAINING(ifp);
263194927Sbz			refcount_release(&sc->refcount);
264194927Sbz		}
265194927Sbz	}
266194927Sbz}
267194927Sbz
268194927Sbzstatic void
269194927Sbzepair_start(struct ifnet *ifp)
270194927Sbz{
271194927Sbz
272194927Sbz	EPAIR_LOCK();
273194927Sbz	epair_start_locked(ifp);
274194927Sbz	EPAIR_UNLOCK();
275194927Sbz}
276194927Sbz
277194927Sbzstatic int
278194927Sbzepair_transmit_locked(struct ifnet *ifp, struct mbuf *m)
279194927Sbz{
280194927Sbz	struct epair_softc *sc;
281194927Sbz	struct ifnet *oifp;
282194927Sbz	int error, len;
283194927Sbz	short mflags;
284194927Sbz
285194927Sbz	EPAIR_LOCK_ASSERT();
286194927Sbz	DPRINTF("ifp=%p m=%p\n", ifp, m);
287194927Sbz
288194927Sbz	if (m == NULL)
289194927Sbz		return (0);
290194927Sbz
291194927Sbz	/*
292194927Sbz	 * We are not going to use the interface en/dequeue mechanism
293194927Sbz	 * on the TX side. We are called from ether_output_frame()
294194927Sbz	 * and will put the packet into the incoming queue of the
295194927Sbz	 * other interface of our pair via the netsir.
296194927Sbz	 */
297194927Sbz	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
298194927Sbz		m_freem(m);
299194927Sbz		return (ENXIO);
300194927Sbz	}
301194927Sbz	if ((ifp->if_flags & IFF_UP) == 0) {
302194927Sbz		m_freem(m);
303194927Sbz		return (ENETDOWN);
304194927Sbz	}
305194927Sbz
306194927Sbz	BPF_MTAP(ifp, m);
307194927Sbz
308194927Sbz	/*
309194927Sbz	 * In case the outgoing interface is not usable,
310194927Sbz	 * drop the packet.
311194927Sbz	 */
312194927Sbz	sc = ifp->if_softc;
313194927Sbz	oifp = sc->oifp;
314194927Sbz	if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
315194927Sbz	    (oifp->if_flags & IFF_UP) ==0) {
316194927Sbz		ifp->if_oerrors++;
317194927Sbz		m_freem(m);
318194927Sbz		return (0);
319194927Sbz	}
320194927Sbz	len = m->m_pkthdr.len;
321194927Sbz	mflags = m->m_flags;
322194927Sbz	DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname);
323194927Sbz
324194927Sbz#ifdef ALTQ
325194927Sbz	/* Support ALTQ via the clasic if_start() path. */
326194927Sbz	IF_LOCK(&ifp->if_snd);
327194927Sbz	if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
328194927Sbz		ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error);
329194927Sbz		if (error)
330194927Sbz			ifp->if_snd.ifq_drops++;
331194927Sbz		IF_UNLOCK(&ifp->if_snd);
332194927Sbz		if (!error) {
333194927Sbz			ifp->if_obytes += len;
334194927Sbz			if (mflags & (M_BCAST|M_MCAST))
335194927Sbz				ifp->if_omcasts++;
336194927Sbz
337194927Sbz			if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0)
338194927Sbz				epair_start_locked(ifp);
339194927Sbz			else
340194927Sbz				ADD_IFQ_FOR_DRAINING(ifp);
341194927Sbz		}
342194927Sbz		return (error);
343194927Sbz	}
344194927Sbz	IF_UNLOCK(&ifp->if_snd);
345194927Sbz#endif
346194927Sbz
347194927Sbz	if ((epair_drv_flags & IFF_DRV_OACTIVE) != 0) {
348194927Sbz		/*
349194927Sbz		 * Our hardware queue is full, try to fall back
350194927Sbz		 * queuing to the ifq but do not call ifp->if_start.
351194927Sbz		 * Either we are lucky or the packet is gone.
352194927Sbz		 */
353194927Sbz		IFQ_ENQUEUE(&ifp->if_snd, m, error);
354194927Sbz		if (!error)
355194927Sbz			ADD_IFQ_FOR_DRAINING(ifp);
356194927Sbz		return (error);
357194927Sbz	}
358194927Sbz	sc = oifp->if_softc;
359194927Sbz	/*
360194927Sbz	 * Add a reference so the interface cannot go while the
361194927Sbz	 * packet is in transit as we rely on rcvif to stay valid.
362194927Sbz	 */
363194927Sbz	refcount_acquire(&sc->refcount);
364194927Sbz	m->m_pkthdr.rcvif = oifp;
365194927Sbz	CURVNET_SET_QUIET(oifp->if_vnet);
366194927Sbz	error = netisr_queue(NETISR_EPAIR, m);
367194927Sbz	CURVNET_RESTORE();
368194927Sbz	if (!error) {
369194927Sbz		ifp->if_opackets++;
370194927Sbz		/*
371194927Sbz		 * IFQ_HANDOFF_ADJ/ip_handoff() update statistics,
372194927Sbz		 * but as we bypass all this we have to duplicate
373194927Sbz		 * the logic another time.
374194927Sbz		 */
375194927Sbz		ifp->if_obytes += len;
376194927Sbz		if (mflags & (M_BCAST|M_MCAST))
377194927Sbz			ifp->if_omcasts++;
378194927Sbz		/* Someone else received the packet. */
379194927Sbz		oifp->if_ipackets++;
380194927Sbz	} else {
381194927Sbz		/* The packet was freed already. */
382194927Sbz		refcount_release(&sc->refcount);
383194927Sbz		epair_drv_flags |= IFF_DRV_OACTIVE;
384194927Sbz		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
385194927Sbz	}
386194927Sbz
387194927Sbz	return (error);
388194927Sbz}
389194927Sbz
390194927Sbzstatic int
391194927Sbzepair_transmit(struct ifnet *ifp, struct mbuf *m)
392194927Sbz{
393194927Sbz	int error;
394194927Sbz
395194927Sbz	EPAIR_LOCK();
396194927Sbz	error = epair_transmit_locked(ifp, m);
397194927Sbz	EPAIR_UNLOCK();
398194927Sbz	return (error);
399194927Sbz}
400194927Sbz
401194927Sbzstatic void
402194927Sbzepair_qflush(struct ifnet *ifp)
403194927Sbz{
404194927Sbz	struct epair_softc *sc;
405194927Sbz	struct ifaltq *ifq;
406194927Sbz
407194927Sbz	EPAIR_LOCK();
408194927Sbz	sc = ifp->if_softc;
409194927Sbz	ifq = &ifp->if_snd;
410194927Sbz	DPRINTF("ifp=%p sc refcnt=%u ifq_len=%u\n",
411194927Sbz	    ifp, sc->refcount, ifq->ifq_len);
412194927Sbz	/*
413194927Sbz	 * Instead of calling refcount_release(&sc->refcount);
414194927Sbz	 * n times, just subtract for the cleanup.
415194927Sbz	 */
416194927Sbz	sc->refcount -= ifq->ifq_len;
417194927Sbz	EPAIR_UNLOCK();
418194927Sbz	if (sc->if_qflush)
419194927Sbz		sc->if_qflush(ifp);
420194927Sbz}
421194927Sbz
422194927Sbzstatic int
423194927Sbzepair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
424194927Sbz{
425194927Sbz	struct ifreq *ifr;
426194927Sbz	int error;
427194927Sbz
428194927Sbz	ifr = (struct ifreq *)data;
429194927Sbz	switch (cmd) {
430194927Sbz	case SIOCSIFFLAGS:
431194927Sbz	case SIOCADDMULTI:
432194927Sbz	case SIOCDELMULTI:
433194927Sbz		error = 0;
434194927Sbz		break;
435194927Sbz
436194927Sbz	default:
437194927Sbz		/* Let the common ethernet handler process this. */
438194927Sbz		error = ether_ioctl(ifp, cmd, data);
439194927Sbz		break;
440194927Sbz	}
441194927Sbz
442194927Sbz	return (error);
443194927Sbz}
444194927Sbz
445194927Sbzstatic void
446194927Sbzepair_init(void *dummy __unused)
447194927Sbz{
448194927Sbz}
449194927Sbz
450194927Sbz
451194927Sbz/*
452194927Sbz * Interface cloning functions.
453194927Sbz * We use our private ones so that we can create/destroy our secondary
454194927Sbz * device along with the primary one.
455194927Sbz */
456194927Sbzstatic int
457194927Sbzepair_clone_match(struct if_clone *ifc, const char *name)
458194927Sbz{
459194927Sbz	const char *cp;
460194927Sbz
461194927Sbz	DPRINTF("name='%s'\n", name);
462194927Sbz
463194927Sbz	/*
464194927Sbz	 * Our base name is epair.
465194927Sbz	 * Our interfaces will be named epair<n>[ab].
466194927Sbz	 * So accept anything of the following list:
467194927Sbz	 * - epair
468194927Sbz	 * - epair<n>
469194927Sbz	 * but not the epair<n>[ab] versions.
470194927Sbz	 */
471194927Sbz	if (strncmp(EPAIRNAME, name, sizeof(EPAIRNAME)-1) != 0)
472194927Sbz		return (0);
473194927Sbz
474194927Sbz	for (cp = name + sizeof(EPAIRNAME) - 1; *cp != '\0'; cp++) {
475194927Sbz		if (*cp < '0' || *cp > '9')
476194927Sbz			return (0);
477194927Sbz	}
478194927Sbz
479194927Sbz	return (1);
480194927Sbz}
481194927Sbz
482194927Sbzstatic int
483194927Sbzepair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
484194927Sbz{
485194927Sbz	struct epair_softc *sca, *scb;
486194927Sbz	struct ifnet *ifp;
487194927Sbz	char *dp;
488194927Sbz	int error, unit, wildcard;
489194927Sbz	uint8_t eaddr[ETHER_ADDR_LEN];	/* 00:00:00:00:00:00 */
490194927Sbz
491194927Sbz	/*
492194927Sbz	 * We are abusing params to create our second interface.
493194927Sbz	 * Actually we already created it and called if_clone_createif()
494194927Sbz	 * for it to do the official insertion procedure the moment we knew
495194927Sbz	 * it cannot fail anymore. So just do attach it here.
496194927Sbz	 */
497194927Sbz	if (params) {
498194927Sbz		scb = (struct epair_softc *)params;
499194927Sbz		ifp = scb->ifp;
500194927Sbz		/* Assign a hopefully unique, locally administered etheraddr. */
501194927Sbz		eaddr[0] = 0x02;
502194927Sbz		eaddr[3] = (ifp->if_index >> 8) & 0xff;
503194927Sbz		eaddr[4] = ifp->if_index & 0xff;
504194927Sbz		eaddr[5] = 0x0b;
505194927Sbz		ether_ifattach(ifp, eaddr);
506194927Sbz		/* Correctly set the name for the cloner list. */
507194927Sbz		strlcpy(name, scb->ifp->if_xname, len);
508194927Sbz		return (0);
509194927Sbz	}
510194927Sbz
511194927Sbz	/* Try to see if a special unit was requested. */
512194927Sbz	error = ifc_name2unit(name, &unit);
513194927Sbz	if (error != 0)
514194927Sbz		return (error);
515194927Sbz	wildcard = (unit < 0);
516194927Sbz
517194927Sbz	error = ifc_alloc_unit(ifc, &unit);
518194927Sbz	if (error != 0)
519194927Sbz		return (error);
520194927Sbz
521194927Sbz	/*
522194927Sbz	 * If no unit had been given, we need to adjust the ifName.
523194927Sbz	 * Also make sure there is space for our extra [ab] suffix.
524194927Sbz	 */
525194927Sbz	for (dp = name; *dp != '\0'; dp++);
526194927Sbz	if (wildcard) {
527194927Sbz		error = snprintf(dp, len - (dp - name), "%d", unit);
528194927Sbz		if (error > len - (dp - name) - 1) {
529194927Sbz			/* ifName too long. */
530194927Sbz			ifc_free_unit(ifc, unit);
531194927Sbz			return (ENOSPC);
532194927Sbz		}
533194927Sbz		dp += error;
534194927Sbz	}
535194927Sbz	if (len - (dp - name) - 1 < 1) {
536194927Sbz		/* No space left for our [ab] suffix. */
537194927Sbz		ifc_free_unit(ifc, unit);
538194927Sbz		return (ENOSPC);
539194927Sbz	}
540194927Sbz	*dp = 'a';
541194927Sbz	/* Must not change dp so we can replace 'a' by 'b' later. */
542194927Sbz	*(dp+1) = '\0';
543194927Sbz
544194927Sbz	/* Allocate memory for both [ab] interfaces */
545194927Sbz	sca = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO);
546194927Sbz	refcount_init(&sca->refcount, 1);
547194927Sbz	sca->ifp = if_alloc(IFT_ETHER);
548194927Sbz	if (sca->ifp == NULL) {
549194927Sbz		free(sca, M_EPAIR);
550194927Sbz		ifc_free_unit(ifc, unit);
551194927Sbz		return (ENOSPC);
552194927Sbz	}
553194927Sbz
554194927Sbz	scb = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO);
555194927Sbz	refcount_init(&scb->refcount, 1);
556194927Sbz	scb->ifp = if_alloc(IFT_ETHER);
557194927Sbz	if (scb->ifp == NULL) {
558194927Sbz		free(scb, M_EPAIR);
559194927Sbz		if_free(sca->ifp);
560194927Sbz		free(sca, M_EPAIR);
561194927Sbz		ifc_free_unit(ifc, unit);
562194927Sbz		return (ENOSPC);
563194927Sbz	}
564194927Sbz
565194927Sbz	/*
566194927Sbz	 * Cross-reference the interfaces so we will be able to free both.
567194927Sbz	 */
568194927Sbz	sca->oifp = scb->ifp;
569194927Sbz	scb->oifp = sca->ifp;
570194927Sbz
571194927Sbz	/* Finish initialization of interface <n>a. */
572194927Sbz	ifp = sca->ifp;
573194927Sbz	ifp->if_softc = sca;
574194927Sbz	strlcpy(ifp->if_xname, name, IFNAMSIZ);
575194927Sbz	ifp->if_dname = ifc->ifc_name;
576194927Sbz	ifp->if_dunit = unit;
577194927Sbz	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
578194927Sbz	ifp->if_start = epair_start;
579194927Sbz	ifp->if_ioctl = epair_ioctl;
580194927Sbz	ifp->if_init  = epair_init;
581194927Sbz	ifp->if_snd.ifq_maxlen = ifqmaxlen;
582194927Sbz	/* Assign a hopefully unique, locally administered etheraddr. */
583194927Sbz	eaddr[0] = 0x02;
584194927Sbz	eaddr[3] = (ifp->if_index >> 8) & 0xff;
585194927Sbz	eaddr[4] = ifp->if_index & 0xff;
586194927Sbz	eaddr[5] = 0x0a;
587194927Sbz	ether_ifattach(ifp, eaddr);
588194927Sbz	sca->if_qflush = ifp->if_qflush;
589194927Sbz	ifp->if_qflush = epair_qflush;
590194927Sbz	ifp->if_transmit = epair_transmit;
591194927Sbz	ifp->if_baudrate = IF_Gbps(10UL);	/* arbitrary maximum */
592194927Sbz
593194927Sbz	/* Swap the name and finish initialization of interface <n>b. */
594194927Sbz	*dp = 'b';
595194927Sbz
596194927Sbz	ifp = scb->ifp;
597194927Sbz	ifp->if_softc = scb;
598194927Sbz	strlcpy(ifp->if_xname, name, IFNAMSIZ);
599194927Sbz	ifp->if_dname = ifc->ifc_name;
600194927Sbz	ifp->if_dunit = unit;
601194927Sbz	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
602194927Sbz	ifp->if_start = epair_start;
603194927Sbz	ifp->if_ioctl = epair_ioctl;
604194927Sbz	ifp->if_init  = epair_init;
605194927Sbz	ifp->if_snd.ifq_maxlen = ifqmaxlen;
606194927Sbz	/* We need to play some tricks here for the second interface. */
607194927Sbz	strlcpy(name, EPAIRNAME, len);
608194927Sbz	error = if_clone_create(name, len, (caddr_t)scb);
609194927Sbz	if (error)
610194927Sbz		panic("%s: if_clone_createif() for our 2nd iface failed: %d",
611194927Sbz		    __func__, error);
612194927Sbz	scb->if_qflush = ifp->if_qflush;
613194927Sbz	ifp->if_qflush = epair_qflush;
614194927Sbz	ifp->if_transmit = epair_transmit;
615194927Sbz	ifp->if_baudrate = IF_Gbps(10UL);	/* arbitrary maximum */
616194927Sbz
617194927Sbz	/*
618194927Sbz	 * Restore name to <n>a as the ifp for this will go into the
619194927Sbz	 * cloner list for the initial call.
620194927Sbz	 */
621194927Sbz	strlcpy(name, sca->ifp->if_xname, len);
622194927Sbz	DPRINTF("name='%s/%db' created sca=%p scb=%p\n", name, unit, sca, scb);
623194927Sbz
624194927Sbz	/* Tell the world, that we are ready to rock. */
625194927Sbz	sca->ifp->if_drv_flags |= IFF_DRV_RUNNING;
626194927Sbz	scb->ifp->if_drv_flags |= IFF_DRV_RUNNING;
627194927Sbz
628194927Sbz	return (0);
629194927Sbz}
630194927Sbz
631194927Sbzstatic int
632194927Sbzepair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
633194927Sbz{
634194927Sbz	struct ifnet *oifp;
635194927Sbz	struct epair_softc *sca, *scb;
636194927Sbz	int unit, error;
637194927Sbz
638194927Sbz	DPRINTF("ifp=%p\n", ifp);
639194927Sbz
640194927Sbz	/*
641194927Sbz	 * In case we called into if_clone_destroyif() ourselves
642194927Sbz	 * again to remove the second interface, the softc will be
643194927Sbz	 * NULL. In that case so not do anything but return success.
644194927Sbz	 */
645194927Sbz	if (ifp->if_softc == NULL)
646194927Sbz		return (0);
647194927Sbz
648194927Sbz	unit = ifp->if_dunit;
649194927Sbz	sca = ifp->if_softc;
650194927Sbz	oifp = sca->oifp;
651194927Sbz	scb = oifp->if_softc;
652194927Sbz
653194927Sbz	DPRINTF("ifp=%p oifp=%p\n", ifp, oifp);
654194927Sbz	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
655194927Sbz	oifp->if_drv_flags &= ~IFF_DRV_RUNNING;
656194927Sbz	ether_ifdetach(oifp);
657194927Sbz	ether_ifdetach(ifp);
658194927Sbz	/*
659194927Sbz	 * Wait for all packets to be dispatched to if_input.
660194927Sbz	 * The numbers can only go down as the interfaces are
661194927Sbz	 * detached so there is no need to use atomics.
662194927Sbz	 */
663194927Sbz	DPRINTF("sca refcnt=%u scb refcnt=%u\n", sca->refcount, scb->refcount);
664194927Sbz	KASSERT(sca->refcount == 1 && scb->refcount == 1,
665194927Sbz	    ("%s: sca->refcount!=1: %d || scb->refcount!=1: %d",
666194927Sbz	    __func__, sca->refcount, scb->refcount));
667194927Sbz
668194927Sbz	/*
669194927Sbz	 * Get rid of our second half.
670194927Sbz	 */
671194927Sbz	oifp->if_softc = NULL;
672194927Sbz	error = if_clone_destroyif(ifc, oifp);
673194927Sbz	if (error)
674194927Sbz		panic("%s: if_clone_destroyif() for our 2nd iface failed: %d",
675194927Sbz		    __func__, error);
676194927Sbz
677194927Sbz	/* Finish cleaning up. Free them and release the unit. */
678194927Sbz	if_free_type(oifp, IFT_ETHER);
679194927Sbz	if_free_type(ifp, IFT_ETHER);
680194927Sbz	free(scb, M_EPAIR);
681194927Sbz	free(sca, M_EPAIR);
682194927Sbz	ifc_free_unit(ifc, unit);
683194927Sbz
684194927Sbz	return (0);
685194927Sbz}
686194927Sbz
687194927Sbzstatic int
688194927Sbzepair_modevent(module_t mod, int type, void *data)
689194927Sbz{
690194927Sbz	int tmp;
691194927Sbz
692194927Sbz	switch (type) {
693194927Sbz	case MOD_LOAD:
694194927Sbz		/* For now limit us to one global mutex and one inq. */
695194927Sbz		EPAIR_LOCK_INIT();
696194927Sbz		epair_drv_flags = 0;
697194927Sbz		epairinq.ifq_maxlen = 16 * ifqmaxlen; /* What is a good 16? */
698194927Sbz		if (TUNABLE_INT_FETCH("net.link.epair.netisr_maxqlen", &tmp))
699194927Sbz		    epairinq.ifq_maxlen = tmp;
700194927Sbz		mtx_init(&epairinq.ifq_mtx, "epair_inq", NULL, MTX_DEF);
701194927Sbz		netisr_register2(NETISR_EPAIR, (netisr_t *)epair_sintr,
702194927Sbz		    epair_sintr_drained, &epairinq, 0);
703194927Sbz		if_clone_attach(&epair_cloner);
704194927Sbz		if (bootverbose)
705194927Sbz			printf("%s initialized.\n", EPAIRNAME);
706194927Sbz		break;
707194927Sbz	case MOD_UNLOAD:
708194927Sbz		if_clone_detach(&epair_cloner);
709194927Sbz		netisr_unregister(NETISR_EPAIR);
710194927Sbz		mtx_destroy(&epairinq.ifq_mtx);
711194927Sbz		EPAIR_LOCK_DESTROY();
712194927Sbz		if (bootverbose)
713194927Sbz			printf("%s unloaded.\n", EPAIRNAME);
714194927Sbz		break;
715194927Sbz	default:
716194927Sbz		return (EOPNOTSUPP);
717194927Sbz	}
718194927Sbz	return (0);
719194927Sbz}
720194927Sbz
721194927Sbzstatic moduledata_t epair_mod = {
722194927Sbz	"if_epair",
723194927Sbz	epair_modevent,
724194927Sbz	0
725194927Sbz};
726194927Sbz
727194927SbzDECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
728194927SbzMODULE_VERSION(if_epair, 1);
729