Deleted Added
full compact
if_epair.c (204805) if_epair.c (209059)
1/*-
2 * Copyright (c) 2008 The FreeBSD Foundation
3 * Copyright (c) 2009-2010 Bjoern A. Zeeb <bz@FreeBSD.org>
4 * All rights reserved.
5 *
6 * This software was developed by CK Software GmbH under sponsorship
7 * from the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31/*
32 * A pair of virtual back-to-back connected ethernet like interfaces
33 * (``two interfaces with a virtual cross-over cable'').
34 *
35 * This is mostly intended to be used to provide connectivity between
36 * different virtual network stack instances.
37 */
38/*
39 * Things to re-think once we have more experience:
40 * - ifp->if_reassign function once we can test with vimage. Depending on
41 * how if_vmove() is going to be improved.
42 * - Real random etheraddrs that are checked to be uniquish; we would need
43 * to re-do them in case we move the interface between network stacks
44 * in a private if_reassign function.
45 * In case we bridge to a real interface/network or between indepedent
46 * epairs on multiple stacks/machines, we may need this.
47 * For now let the user handle that case.
48 */
49
50#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 2008 The FreeBSD Foundation
3 * Copyright (c) 2009-2010 Bjoern A. Zeeb <bz@FreeBSD.org>
4 * All rights reserved.
5 *
6 * This software was developed by CK Software GmbH under sponsorship
7 * from the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31/*
32 * A pair of virtual back-to-back connected ethernet like interfaces
33 * (``two interfaces with a virtual cross-over cable'').
34 *
35 * This is mostly intended to be used to provide connectivity between
36 * different virtual network stack instances.
37 */
38/*
39 * Things to re-think once we have more experience:
40 * - ifp->if_reassign function once we can test with vimage. Depending on
41 * how if_vmove() is going to be improved.
42 * - Real random etheraddrs that are checked to be uniquish; we would need
43 * to re-do them in case we move the interface between network stacks
44 * in a private if_reassign function.
45 * In case we bridge to a real interface/network or between indepedent
46 * epairs on multiple stacks/machines, we may need this.
47 * For now let the user handle that case.
48 */
49
50#include <sys/cdefs.h>
51__FBSDID("$FreeBSD: head/sys/net/if_epair.c 204805 2010-03-06 21:22:28Z bz $");
51__FBSDID("$FreeBSD: head/sys/net/if_epair.c 209059 2010-06-11 18:46:34Z jhb $");
52
53#include <sys/param.h>
54#include <sys/kernel.h>
55#include <sys/mbuf.h>
56#include <sys/module.h>
57#include <sys/refcount.h>
58#include <sys/queue.h>
59#include <sys/smp.h>
60#include <sys/socket.h>
61#include <sys/sockio.h>
62#include <sys/sysctl.h>
63#include <sys/types.h>
64
65#include <net/bpf.h>
66#include <net/ethernet.h>
67#include <net/if.h>
68#include <net/if_clone.h>
69#include <net/if_var.h>
70#include <net/if_types.h>
71#include <net/netisr.h>
72#include <net/vnet.h>
73
74#define EPAIRNAME "epair"
75
76SYSCTL_DECL(_net_link);
77SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW, 0, "epair sysctl");
78
79#ifdef EPAIR_DEBUG
80static int epair_debug = 0;
81SYSCTL_XINT(_net_link_epair, OID_AUTO, epair_debug, CTLFLAG_RW,
82 &epair_debug, 0, "if_epair(4) debugging.");
83#define DPRINTF(fmt, arg...) \
84 if (epair_debug) \
85 printf("[%s:%d] " fmt, __func__, __LINE__, ##arg)
86#else
87#define DPRINTF(fmt, arg...)
88#endif
89
90static void epair_nh_sintr(struct mbuf *);
91static struct mbuf *epair_nh_m2cpuid(struct mbuf *, uintptr_t, u_int *);
92static void epair_nh_drainedcpu(u_int);
93
94static void epair_start_locked(struct ifnet *);
95
96static int epair_clone_match(struct if_clone *, const char *);
97static int epair_clone_create(struct if_clone *, char *, size_t, caddr_t);
98static int epair_clone_destroy(struct if_clone *, struct ifnet *);
99
100/* Netisr realted definitions and sysctl. */
101static struct netisr_handler epair_nh = {
102 .nh_name = EPAIRNAME,
103 .nh_proto = NETISR_EPAIR,
104 .nh_policy = NETISR_POLICY_CPU,
105 .nh_handler = epair_nh_sintr,
106 .nh_m2cpuid = epair_nh_m2cpuid,
107 .nh_drainedcpu = epair_nh_drainedcpu,
108};
109
110static int
111sysctl_epair_netisr_maxqlen(SYSCTL_HANDLER_ARGS)
112{
113 int error, qlimit;
114
115 netisr_getqlimit(&epair_nh, &qlimit);
116 error = sysctl_handle_int(oidp, &qlimit, 0, req);
117 if (error || !req->newptr)
118 return (error);
119 if (qlimit < 1)
120 return (EINVAL);
121 return (netisr_setqlimit(&epair_nh, qlimit));
122}
123SYSCTL_PROC(_net_link_epair, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW,
124 0, 0, sysctl_epair_netisr_maxqlen, "I",
125 "Maximum if_epair(4) netisr \"hw\" queue length");
126
127struct epair_softc {
128 struct ifnet *ifp; /* This ifp. */
129 struct ifnet *oifp; /* other ifp of pair. */
130 u_int refcount; /* # of mbufs in flight. */
131 u_int cpuid; /* CPU ID assigned upon creation. */
132 void (*if_qflush)(struct ifnet *);
133 /* Original if_qflush routine. */
134};
135
136/*
137 * Per-CPU list of ifps with data in the ifq that needs to be flushed
138 * to the netisr ``hw'' queue before we allow any further direct queuing
139 * to the ``hw'' queue.
140 */
141struct epair_ifp_drain {
142 STAILQ_ENTRY(epair_ifp_drain) ifp_next;
143 struct ifnet *ifp;
144};
145STAILQ_HEAD(eid_list, epair_ifp_drain);
146
147#define EPAIR_LOCK_INIT(dpcpu) mtx_init(&(dpcpu)->if_epair_mtx, \
148 "if_epair", NULL, MTX_DEF)
149#define EPAIR_LOCK_DESTROY(dpcpu) mtx_destroy(&(dpcpu)->if_epair_mtx)
150#define EPAIR_LOCK_ASSERT(dpcpu) mtx_assert(&(dpcpu)->if_epair_mtx, \
151 MA_OWNED)
152#define EPAIR_LOCK(dpcpu) mtx_lock(&(dpcpu)->if_epair_mtx)
153#define EPAIR_UNLOCK(dpcpu) mtx_unlock(&(dpcpu)->if_epair_mtx)
154
155#ifdef INVARIANTS
156#define EPAIR_REFCOUNT_INIT(r, v) refcount_init((r), (v))
157#define EPAIR_REFCOUNT_AQUIRE(r) refcount_acquire((r))
158#define EPAIR_REFCOUNT_RELEASE(r) refcount_release((r))
159#define EPAIR_REFCOUNT_ASSERT(a, p) KASSERT(a, p)
160#else
161#define EPAIR_REFCOUNT_INIT(r, v)
162#define EPAIR_REFCOUNT_AQUIRE(r)
163#define EPAIR_REFCOUNT_RELEASE(r)
164#define EPAIR_REFCOUNT_ASSERT(a, p)
165#endif
166
167static MALLOC_DEFINE(M_EPAIR, EPAIRNAME,
168 "Pair of virtual cross-over connected Ethernet-like interfaces");
169
170static struct if_clone epair_cloner = IFC_CLONE_INITIALIZER(
171 EPAIRNAME, NULL, IF_MAXUNIT,
172 NULL, epair_clone_match, epair_clone_create, epair_clone_destroy);
173
174/*
175 * DPCPU area and functions.
176 */
177struct epair_dpcpu {
178 struct mtx if_epair_mtx; /* Per-CPU locking. */
179 int epair_drv_flags; /* Per-CPU ``hw'' drv flags. */
180 struct eid_list epair_ifp_drain_list; /* Per-CPU list of ifps with
181 * data in the ifq. */
182};
183DPCPU_DEFINE(struct epair_dpcpu, epair_dpcpu);
184
185static void
186epair_dpcpu_init(void)
187{
188 struct epair_dpcpu *epair_dpcpu;
189 struct eid_list *s;
190 u_int cpuid;
191
52
53#include <sys/param.h>
54#include <sys/kernel.h>
55#include <sys/mbuf.h>
56#include <sys/module.h>
57#include <sys/refcount.h>
58#include <sys/queue.h>
59#include <sys/smp.h>
60#include <sys/socket.h>
61#include <sys/sockio.h>
62#include <sys/sysctl.h>
63#include <sys/types.h>
64
65#include <net/bpf.h>
66#include <net/ethernet.h>
67#include <net/if.h>
68#include <net/if_clone.h>
69#include <net/if_var.h>
70#include <net/if_types.h>
71#include <net/netisr.h>
72#include <net/vnet.h>
73
74#define EPAIRNAME "epair"
75
76SYSCTL_DECL(_net_link);
77SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW, 0, "epair sysctl");
78
79#ifdef EPAIR_DEBUG
80static int epair_debug = 0;
81SYSCTL_XINT(_net_link_epair, OID_AUTO, epair_debug, CTLFLAG_RW,
82 &epair_debug, 0, "if_epair(4) debugging.");
83#define DPRINTF(fmt, arg...) \
84 if (epair_debug) \
85 printf("[%s:%d] " fmt, __func__, __LINE__, ##arg)
86#else
87#define DPRINTF(fmt, arg...)
88#endif
89
90static void epair_nh_sintr(struct mbuf *);
91static struct mbuf *epair_nh_m2cpuid(struct mbuf *, uintptr_t, u_int *);
92static void epair_nh_drainedcpu(u_int);
93
94static void epair_start_locked(struct ifnet *);
95
96static int epair_clone_match(struct if_clone *, const char *);
97static int epair_clone_create(struct if_clone *, char *, size_t, caddr_t);
98static int epair_clone_destroy(struct if_clone *, struct ifnet *);
99
100/* Netisr realted definitions and sysctl. */
101static struct netisr_handler epair_nh = {
102 .nh_name = EPAIRNAME,
103 .nh_proto = NETISR_EPAIR,
104 .nh_policy = NETISR_POLICY_CPU,
105 .nh_handler = epair_nh_sintr,
106 .nh_m2cpuid = epair_nh_m2cpuid,
107 .nh_drainedcpu = epair_nh_drainedcpu,
108};
109
110static int
111sysctl_epair_netisr_maxqlen(SYSCTL_HANDLER_ARGS)
112{
113 int error, qlimit;
114
115 netisr_getqlimit(&epair_nh, &qlimit);
116 error = sysctl_handle_int(oidp, &qlimit, 0, req);
117 if (error || !req->newptr)
118 return (error);
119 if (qlimit < 1)
120 return (EINVAL);
121 return (netisr_setqlimit(&epair_nh, qlimit));
122}
123SYSCTL_PROC(_net_link_epair, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW,
124 0, 0, sysctl_epair_netisr_maxqlen, "I",
125 "Maximum if_epair(4) netisr \"hw\" queue length");
126
127struct epair_softc {
128 struct ifnet *ifp; /* This ifp. */
129 struct ifnet *oifp; /* other ifp of pair. */
130 u_int refcount; /* # of mbufs in flight. */
131 u_int cpuid; /* CPU ID assigned upon creation. */
132 void (*if_qflush)(struct ifnet *);
133 /* Original if_qflush routine. */
134};
135
136/*
137 * Per-CPU list of ifps with data in the ifq that needs to be flushed
138 * to the netisr ``hw'' queue before we allow any further direct queuing
139 * to the ``hw'' queue.
140 */
141struct epair_ifp_drain {
142 STAILQ_ENTRY(epair_ifp_drain) ifp_next;
143 struct ifnet *ifp;
144};
145STAILQ_HEAD(eid_list, epair_ifp_drain);
146
147#define EPAIR_LOCK_INIT(dpcpu) mtx_init(&(dpcpu)->if_epair_mtx, \
148 "if_epair", NULL, MTX_DEF)
149#define EPAIR_LOCK_DESTROY(dpcpu) mtx_destroy(&(dpcpu)->if_epair_mtx)
150#define EPAIR_LOCK_ASSERT(dpcpu) mtx_assert(&(dpcpu)->if_epair_mtx, \
151 MA_OWNED)
152#define EPAIR_LOCK(dpcpu) mtx_lock(&(dpcpu)->if_epair_mtx)
153#define EPAIR_UNLOCK(dpcpu) mtx_unlock(&(dpcpu)->if_epair_mtx)
154
155#ifdef INVARIANTS
156#define EPAIR_REFCOUNT_INIT(r, v) refcount_init((r), (v))
157#define EPAIR_REFCOUNT_AQUIRE(r) refcount_acquire((r))
158#define EPAIR_REFCOUNT_RELEASE(r) refcount_release((r))
159#define EPAIR_REFCOUNT_ASSERT(a, p) KASSERT(a, p)
160#else
161#define EPAIR_REFCOUNT_INIT(r, v)
162#define EPAIR_REFCOUNT_AQUIRE(r)
163#define EPAIR_REFCOUNT_RELEASE(r)
164#define EPAIR_REFCOUNT_ASSERT(a, p)
165#endif
166
167static MALLOC_DEFINE(M_EPAIR, EPAIRNAME,
168 "Pair of virtual cross-over connected Ethernet-like interfaces");
169
170static struct if_clone epair_cloner = IFC_CLONE_INITIALIZER(
171 EPAIRNAME, NULL, IF_MAXUNIT,
172 NULL, epair_clone_match, epair_clone_create, epair_clone_destroy);
173
174/*
175 * DPCPU area and functions.
176 */
177struct epair_dpcpu {
178 struct mtx if_epair_mtx; /* Per-CPU locking. */
179 int epair_drv_flags; /* Per-CPU ``hw'' drv flags. */
180 struct eid_list epair_ifp_drain_list; /* Per-CPU list of ifps with
181 * data in the ifq. */
182};
183DPCPU_DEFINE(struct epair_dpcpu, epair_dpcpu);
184
185static void
186epair_dpcpu_init(void)
187{
188 struct epair_dpcpu *epair_dpcpu;
189 struct eid_list *s;
190 u_int cpuid;
191
192 for (cpuid = 0; cpuid <= mp_maxid; cpuid++) {
193 if (CPU_ABSENT(cpuid))
194 continue;
195
192 CPU_FOREACH(cpuid) {
196 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu);
197
198 /* Initialize per-cpu lock. */
199 EPAIR_LOCK_INIT(epair_dpcpu);
200
201 /* Driver flags are per-cpu as are our netisr "hw" queues. */
202 epair_dpcpu->epair_drv_flags = 0;
203
204 /*
205 * Initialize per-cpu drain list.
206 * Manually do what STAILQ_HEAD_INITIALIZER would do.
207 */
208 s = &epair_dpcpu->epair_ifp_drain_list;
209 s->stqh_first = NULL;
210 s->stqh_last = &s->stqh_first;
211 }
212}
213
214static void
215epair_dpcpu_detach(void)
216{
217 struct epair_dpcpu *epair_dpcpu;
218 u_int cpuid;
219
193 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu);
194
195 /* Initialize per-cpu lock. */
196 EPAIR_LOCK_INIT(epair_dpcpu);
197
198 /* Driver flags are per-cpu as are our netisr "hw" queues. */
199 epair_dpcpu->epair_drv_flags = 0;
200
201 /*
202 * Initialize per-cpu drain list.
203 * Manually do what STAILQ_HEAD_INITIALIZER would do.
204 */
205 s = &epair_dpcpu->epair_ifp_drain_list;
206 s->stqh_first = NULL;
207 s->stqh_last = &s->stqh_first;
208 }
209}
210
211static void
212epair_dpcpu_detach(void)
213{
214 struct epair_dpcpu *epair_dpcpu;
215 u_int cpuid;
216
220 for (cpuid = 0; cpuid <= mp_maxid; cpuid++) {
221 if (CPU_ABSENT(cpuid))
222 continue;
223
217 CPU_FOREACH(cpuid) {
224 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu);
225
226 /* Destroy per-cpu lock. */
227 EPAIR_LOCK_DESTROY(epair_dpcpu);
228 }
229}
230
231/*
232 * Helper functions.
233 */
234static u_int
235cpuid_from_ifp(struct ifnet *ifp)
236{
237 struct epair_softc *sc;
238
239 if (ifp == NULL)
240 return (0);
241 sc = ifp->if_softc;
242
243 return (sc->cpuid);
244}
245
246/*
247 * Netisr handler functions.
248 */
249static void
250epair_nh_sintr(struct mbuf *m)
251{
252 struct ifnet *ifp;
253 struct epair_softc *sc;
254
255 ifp = m->m_pkthdr.rcvif;
256 (*ifp->if_input)(ifp, m);
257 sc = ifp->if_softc;
258 EPAIR_REFCOUNT_RELEASE(&sc->refcount);
259 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1,
260 ("%s: ifp=%p sc->refcount not >= 1: %d",
261 __func__, ifp, sc->refcount));
262 DPRINTF("ifp=%p refcount=%u\n", ifp, sc->refcount);
263}
264
265static struct mbuf *
266epair_nh_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid)
267{
268
269 *cpuid = cpuid_from_ifp(m->m_pkthdr.rcvif);
270
271 return (m);
272}
273
274static void
275epair_nh_drainedcpu(u_int cpuid)
276{
277 struct epair_dpcpu *epair_dpcpu;
278 struct epair_ifp_drain *elm, *tvar;
279 struct ifnet *ifp;
280
281 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu);
282 EPAIR_LOCK(epair_dpcpu);
283 /*
284 * Assume our "hw" queue and possibly ifq will be emptied
285 * again. In case we will overflow the "hw" queue while
286 * draining, epair_start_locked will set IFF_DRV_OACTIVE
287 * again and we will stop and return.
288 */
289 STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list,
290 ifp_next, tvar) {
291 ifp = elm->ifp;
292 epair_dpcpu->epair_drv_flags &= ~IFF_DRV_OACTIVE;
293 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
294 epair_start_locked(ifp);
295
296 IFQ_LOCK(&ifp->if_snd);
297 if (IFQ_IS_EMPTY(&ifp->if_snd)) {
298 struct epair_softc *sc;
299
300 STAILQ_REMOVE(&epair_dpcpu->epair_ifp_drain_list,
301 elm, epair_ifp_drain, ifp_next);
302 /* The cached ifp goes off the list. */
303 sc = ifp->if_softc;
304 EPAIR_REFCOUNT_RELEASE(&sc->refcount);
305 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1,
306 ("%s: ifp=%p sc->refcount not >= 1: %d",
307 __func__, ifp, sc->refcount));
308 free(elm, M_EPAIR);
309 }
310 IFQ_UNLOCK(&ifp->if_snd);
311
312 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) != 0) {
313 /* Our "hw"q overflew again. */
314 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE
315 DPRINTF("hw queue length overflow at %u\n",
316 epair_nh.nh_qlimit);
317 break;
318 }
319 }
320 EPAIR_UNLOCK(epair_dpcpu);
321}
322
323/*
324 * Network interface (`if') related functions.
325 */
326static void
327epair_remove_ifp_from_draining(struct ifnet *ifp)
328{
329 struct epair_dpcpu *epair_dpcpu;
330 struct epair_ifp_drain *elm, *tvar;
331 u_int cpuid;
332
218 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu);
219
220 /* Destroy per-cpu lock. */
221 EPAIR_LOCK_DESTROY(epair_dpcpu);
222 }
223}
224
225/*
226 * Helper functions.
227 */
228static u_int
229cpuid_from_ifp(struct ifnet *ifp)
230{
231 struct epair_softc *sc;
232
233 if (ifp == NULL)
234 return (0);
235 sc = ifp->if_softc;
236
237 return (sc->cpuid);
238}
239
240/*
241 * Netisr handler functions.
242 */
243static void
244epair_nh_sintr(struct mbuf *m)
245{
246 struct ifnet *ifp;
247 struct epair_softc *sc;
248
249 ifp = m->m_pkthdr.rcvif;
250 (*ifp->if_input)(ifp, m);
251 sc = ifp->if_softc;
252 EPAIR_REFCOUNT_RELEASE(&sc->refcount);
253 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1,
254 ("%s: ifp=%p sc->refcount not >= 1: %d",
255 __func__, ifp, sc->refcount));
256 DPRINTF("ifp=%p refcount=%u\n", ifp, sc->refcount);
257}
258
259static struct mbuf *
260epair_nh_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid)
261{
262
263 *cpuid = cpuid_from_ifp(m->m_pkthdr.rcvif);
264
265 return (m);
266}
267
268static void
269epair_nh_drainedcpu(u_int cpuid)
270{
271 struct epair_dpcpu *epair_dpcpu;
272 struct epair_ifp_drain *elm, *tvar;
273 struct ifnet *ifp;
274
275 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu);
276 EPAIR_LOCK(epair_dpcpu);
277 /*
278 * Assume our "hw" queue and possibly ifq will be emptied
279 * again. In case we will overflow the "hw" queue while
280 * draining, epair_start_locked will set IFF_DRV_OACTIVE
281 * again and we will stop and return.
282 */
283 STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list,
284 ifp_next, tvar) {
285 ifp = elm->ifp;
286 epair_dpcpu->epair_drv_flags &= ~IFF_DRV_OACTIVE;
287 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
288 epair_start_locked(ifp);
289
290 IFQ_LOCK(&ifp->if_snd);
291 if (IFQ_IS_EMPTY(&ifp->if_snd)) {
292 struct epair_softc *sc;
293
294 STAILQ_REMOVE(&epair_dpcpu->epair_ifp_drain_list,
295 elm, epair_ifp_drain, ifp_next);
296 /* The cached ifp goes off the list. */
297 sc = ifp->if_softc;
298 EPAIR_REFCOUNT_RELEASE(&sc->refcount);
299 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1,
300 ("%s: ifp=%p sc->refcount not >= 1: %d",
301 __func__, ifp, sc->refcount));
302 free(elm, M_EPAIR);
303 }
304 IFQ_UNLOCK(&ifp->if_snd);
305
306 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) != 0) {
307 /* Our "hw"q overflew again. */
308 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE
309 DPRINTF("hw queue length overflow at %u\n",
310 epair_nh.nh_qlimit);
311 break;
312 }
313 }
314 EPAIR_UNLOCK(epair_dpcpu);
315}
316
317/*
318 * Network interface (`if') related functions.
319 */
320static void
321epair_remove_ifp_from_draining(struct ifnet *ifp)
322{
323 struct epair_dpcpu *epair_dpcpu;
324 struct epair_ifp_drain *elm, *tvar;
325 u_int cpuid;
326
333 for (cpuid = 0; cpuid <= mp_maxid; cpuid++) {
334 if (CPU_ABSENT(cpuid))
335 continue;
336
327 CPU_FOREACH(cpuid) {
337 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu);
338 EPAIR_LOCK(epair_dpcpu);
339 STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list,
340 ifp_next, tvar) {
341 if (ifp == elm->ifp) {
342 struct epair_softc *sc;
343
344 STAILQ_REMOVE(
345 &epair_dpcpu->epair_ifp_drain_list, elm,
346 epair_ifp_drain, ifp_next);
347 /* The cached ifp goes off the list. */
348 sc = ifp->if_softc;
349 EPAIR_REFCOUNT_RELEASE(&sc->refcount);
350 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1,
351 ("%s: ifp=%p sc->refcount not >= 1: %d",
352 __func__, ifp, sc->refcount));
353 free(elm, M_EPAIR);
354 }
355 }
356 EPAIR_UNLOCK(epair_dpcpu);
357 }
358}
359
360static int
361epair_add_ifp_for_draining(struct ifnet *ifp)
362{
363 struct epair_dpcpu *epair_dpcpu;
364 struct epair_softc *sc;
365 struct epair_ifp_drain *elm = NULL;
366
367 sc = ifp->if_softc;
368 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu);
369 EPAIR_LOCK_ASSERT(epair_dpcpu);
370 STAILQ_FOREACH(elm, &epair_dpcpu->epair_ifp_drain_list, ifp_next)
371 if (elm->ifp == ifp)
372 break;
373 /* If the ifp is there already, return success. */
374 if (elm != NULL)
375 return (0);
376
377 elm = malloc(sizeof(struct epair_ifp_drain), M_EPAIR, M_NOWAIT|M_ZERO);
378 if (elm == NULL)
379 return (ENOMEM);
380
381 elm->ifp = ifp;
382 /* Add a reference for the ifp pointer on the list. */
383 EPAIR_REFCOUNT_AQUIRE(&sc->refcount);
384 STAILQ_INSERT_TAIL(&epair_dpcpu->epair_ifp_drain_list, elm, ifp_next);
385
386 return (0);
387}
388
389static void
390epair_start_locked(struct ifnet *ifp)
391{
392 struct epair_dpcpu *epair_dpcpu;
393 struct mbuf *m;
394 struct epair_softc *sc;
395 struct ifnet *oifp;
396 int error;
397
398 DPRINTF("ifp=%p\n", ifp);
399 sc = ifp->if_softc;
400 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu);
401 EPAIR_LOCK_ASSERT(epair_dpcpu);
402
403 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
404 return;
405 if ((ifp->if_flags & IFF_UP) == 0)
406 return;
407
408 /*
409 * We get patckets here from ether_output via if_handoff()
410 * and ned to put them into the input queue of the oifp
411 * and call oifp->if_input() via netisr/epair_sintr().
412 */
413 oifp = sc->oifp;
414 sc = oifp->if_softc;
415 for (;;) {
416 IFQ_DEQUEUE(&ifp->if_snd, m);
417 if (m == NULL)
418 break;
419 BPF_MTAP(ifp, m);
420
421 /*
422 * In case the outgoing interface is not usable,
423 * drop the packet.
424 */
425 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
426 (oifp->if_flags & IFF_UP) ==0) {
427 ifp->if_oerrors++;
428 m_freem(m);
429 continue;
430 }
431 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname);
432
433 /*
434 * Add a reference so the interface cannot go while the
435 * packet is in transit as we rely on rcvif to stay valid.
436 */
437 EPAIR_REFCOUNT_AQUIRE(&sc->refcount);
438 m->m_pkthdr.rcvif = oifp;
439 CURVNET_SET_QUIET(oifp->if_vnet);
440 error = netisr_queue(NETISR_EPAIR, m);
441 CURVNET_RESTORE();
442 if (!error) {
443 ifp->if_opackets++;
444 /* Someone else received the packet. */
445 oifp->if_ipackets++;
446 } else {
447 /* The packet was freed already. */
448 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE;
449 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
450 (void) epair_add_ifp_for_draining(ifp);
451 ifp->if_oerrors++;
452 EPAIR_REFCOUNT_RELEASE(&sc->refcount);
453 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1,
454 ("%s: ifp=%p sc->refcount not >= 1: %d",
455 __func__, oifp, sc->refcount));
456 }
457 }
458}
459
460static void
461epair_start(struct ifnet *ifp)
462{
463 struct epair_dpcpu *epair_dpcpu;
464
465 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu);
466 EPAIR_LOCK(epair_dpcpu);
467 epair_start_locked(ifp);
468 EPAIR_UNLOCK(epair_dpcpu);
469}
470
471static int
472epair_transmit_locked(struct ifnet *ifp, struct mbuf *m)
473{
474 struct epair_dpcpu *epair_dpcpu;
475 struct epair_softc *sc;
476 struct ifnet *oifp;
477 int error, len;
478 short mflags;
479
480 DPRINTF("ifp=%p m=%p\n", ifp, m);
481 sc = ifp->if_softc;
482 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu);
483 EPAIR_LOCK_ASSERT(epair_dpcpu);
484
485 if (m == NULL)
486 return (0);
487
488 /*
489 * We are not going to use the interface en/dequeue mechanism
490 * on the TX side. We are called from ether_output_frame()
491 * and will put the packet into the incoming queue of the
492 * other interface of our pair via the netsir.
493 */
494 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
495 m_freem(m);
496 return (ENXIO);
497 }
498 if ((ifp->if_flags & IFF_UP) == 0) {
499 m_freem(m);
500 return (ENETDOWN);
501 }
502
503 BPF_MTAP(ifp, m);
504
505 /*
506 * In case the outgoing interface is not usable,
507 * drop the packet.
508 */
509 oifp = sc->oifp;
510 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
511 (oifp->if_flags & IFF_UP) ==0) {
512 ifp->if_oerrors++;
513 m_freem(m);
514 return (0);
515 }
516 len = m->m_pkthdr.len;
517 mflags = m->m_flags;
518 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname);
519
520#ifdef ALTQ
521 /* Support ALTQ via the clasic if_start() path. */
522 IF_LOCK(&ifp->if_snd);
523 if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
524 ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error);
525 if (error)
526 ifp->if_snd.ifq_drops++;
527 IF_UNLOCK(&ifp->if_snd);
528 if (!error) {
529 ifp->if_obytes += len;
530 if (mflags & (M_BCAST|M_MCAST))
531 ifp->if_omcasts++;
532
533 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0)
534 epair_start_locked(ifp);
535 else
536 (void)epair_add_ifp_for_draining(ifp);
537 }
538 return (error);
539 }
540 IF_UNLOCK(&ifp->if_snd);
541#endif
542
543 if ((epair_dpcpu->epair_drv_flags & IFF_DRV_OACTIVE) != 0) {
544 /*
545 * Our hardware queue is full, try to fall back
546 * queuing to the ifq but do not call ifp->if_start.
547 * Either we are lucky or the packet is gone.
548 */
549 IFQ_ENQUEUE(&ifp->if_snd, m, error);
550 if (!error)
551 (void)epair_add_ifp_for_draining(ifp);
552 return (error);
553 }
554 sc = oifp->if_softc;
555 /*
556 * Add a reference so the interface cannot go while the
557 * packet is in transit as we rely on rcvif to stay valid.
558 */
559 EPAIR_REFCOUNT_AQUIRE(&sc->refcount);
560 m->m_pkthdr.rcvif = oifp;
561 CURVNET_SET_QUIET(oifp->if_vnet);
562 error = netisr_queue(NETISR_EPAIR, m);
563 CURVNET_RESTORE();
564 if (!error) {
565 ifp->if_opackets++;
566 /*
567 * IFQ_HANDOFF_ADJ/ip_handoff() update statistics,
568 * but as we bypass all this we have to duplicate
569 * the logic another time.
570 */
571 ifp->if_obytes += len;
572 if (mflags & (M_BCAST|M_MCAST))
573 ifp->if_omcasts++;
574 /* Someone else received the packet. */
575 oifp->if_ipackets++;
576 } else {
577 /* The packet was freed already. */
578 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE;
579 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
580 ifp->if_oerrors++;
581 EPAIR_REFCOUNT_RELEASE(&sc->refcount);
582 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1,
583 ("%s: ifp=%p sc->refcount not >= 1: %d",
584 __func__, oifp, sc->refcount));
585 }
586
587 return (error);
588}
589
590static int
591epair_transmit(struct ifnet *ifp, struct mbuf *m)
592{
593 struct epair_dpcpu *epair_dpcpu;
594 int error;
595
596 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu);
597 EPAIR_LOCK(epair_dpcpu);
598 error = epair_transmit_locked(ifp, m);
599 EPAIR_UNLOCK(epair_dpcpu);
600 return (error);
601}
602
603static void
604epair_qflush(struct ifnet *ifp)
605{
606 struct epair_softc *sc;
607
608 sc = ifp->if_softc;
609 KASSERT(sc != NULL, ("%s: ifp=%p, epair_softc gone? sc=%p\n",
610 __func__, ifp, sc));
611 /*
612 * Remove this ifp from all backpointer lists. The interface will not
613 * usable for flushing anyway nor should it have anything to flush
614 * after if_qflush().
615 */
616 epair_remove_ifp_from_draining(ifp);
617
618 if (sc->if_qflush)
619 sc->if_qflush(ifp);
620}
621
622static int
623epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
624{
625 struct ifreq *ifr;
626 int error;
627
628 ifr = (struct ifreq *)data;
629 switch (cmd) {
630 case SIOCSIFFLAGS:
631 case SIOCADDMULTI:
632 case SIOCDELMULTI:
633 error = 0;
634 break;
635
636 case SIOCSIFMTU:
637 /* We basically allow all kinds of MTUs. */
638 ifp->if_mtu = ifr->ifr_mtu;
639 error = 0;
640 break;
641
642 default:
643 /* Let the common ethernet handler process this. */
644 error = ether_ioctl(ifp, cmd, data);
645 break;
646 }
647
648 return (error);
649}
650
651static void
652epair_init(void *dummy __unused)
653{
654}
655
656
657/*
658 * Interface cloning functions.
659 * We use our private ones so that we can create/destroy our secondary
660 * device along with the primary one.
661 */
662static int
663epair_clone_match(struct if_clone *ifc, const char *name)
664{
665 const char *cp;
666
667 DPRINTF("name='%s'\n", name);
668
669 /*
670 * Our base name is epair.
671 * Our interfaces will be named epair<n>[ab].
672 * So accept anything of the following list:
673 * - epair
674 * - epair<n>
675 * but not the epair<n>[ab] versions.
676 */
677 if (strncmp(EPAIRNAME, name, sizeof(EPAIRNAME)-1) != 0)
678 return (0);
679
680 for (cp = name + sizeof(EPAIRNAME) - 1; *cp != '\0'; cp++) {
681 if (*cp < '0' || *cp > '9')
682 return (0);
683 }
684
685 return (1);
686}
687
688static int
689epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
690{
691 struct epair_softc *sca, *scb;
692 struct ifnet *ifp;
693 char *dp;
694 int error, unit, wildcard;
695 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */
696
697 /*
698 * We are abusing params to create our second interface.
699 * Actually we already created it and called if_clone_createif()
700 * for it to do the official insertion procedure the moment we knew
701 * it cannot fail anymore. So just do attach it here.
702 */
703 if (params) {
704 scb = (struct epair_softc *)params;
705 ifp = scb->ifp;
706 /* Assign a hopefully unique, locally administered etheraddr. */
707 eaddr[0] = 0x02;
708 eaddr[3] = (ifp->if_index >> 8) & 0xff;
709 eaddr[4] = ifp->if_index & 0xff;
710 eaddr[5] = 0x0b;
711 ether_ifattach(ifp, eaddr);
712 /* Correctly set the name for the cloner list. */
713 strlcpy(name, scb->ifp->if_xname, len);
714 return (0);
715 }
716
717 /* Try to see if a special unit was requested. */
718 error = ifc_name2unit(name, &unit);
719 if (error != 0)
720 return (error);
721 wildcard = (unit < 0);
722
723 error = ifc_alloc_unit(ifc, &unit);
724 if (error != 0)
725 return (error);
726
727 /*
728 * If no unit had been given, we need to adjust the ifName.
729 * Also make sure there is space for our extra [ab] suffix.
730 */
731 for (dp = name; *dp != '\0'; dp++);
732 if (wildcard) {
733 error = snprintf(dp, len - (dp - name), "%d", unit);
734 if (error > len - (dp - name) - 1) {
735 /* ifName too long. */
736 ifc_free_unit(ifc, unit);
737 return (ENOSPC);
738 }
739 dp += error;
740 }
741 if (len - (dp - name) - 1 < 1) {
742 /* No space left for our [ab] suffix. */
743 ifc_free_unit(ifc, unit);
744 return (ENOSPC);
745 }
746 *dp = 'a';
747 /* Must not change dp so we can replace 'a' by 'b' later. */
748 *(dp+1) = '\0';
749
750 /* Allocate memory for both [ab] interfaces */
751 sca = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO);
752 EPAIR_REFCOUNT_INIT(&sca->refcount, 1);
753 sca->ifp = if_alloc(IFT_ETHER);
754 if (sca->ifp == NULL) {
755 free(sca, M_EPAIR);
756 ifc_free_unit(ifc, unit);
757 return (ENOSPC);
758 }
759
760 scb = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO);
761 EPAIR_REFCOUNT_INIT(&scb->refcount, 1);
762 scb->ifp = if_alloc(IFT_ETHER);
763 if (scb->ifp == NULL) {
764 free(scb, M_EPAIR);
765 if_free(sca->ifp);
766 free(sca, M_EPAIR);
767 ifc_free_unit(ifc, unit);
768 return (ENOSPC);
769 }
770
771 /*
772 * Cross-reference the interfaces so we will be able to free both.
773 */
774 sca->oifp = scb->ifp;
775 scb->oifp = sca->ifp;
776
777 /*
778 * Calculate the cpuid for netisr queueing based on the
779 * ifIndex of the interfaces. As long as we cannot configure
780 * this or use cpuset information easily we cannot guarantee
781 * cache locality but we can at least allow parallelism.
782 */
783 sca->cpuid =
784 netisr_get_cpuid(sca->ifp->if_index % netisr_get_cpucount());
785 scb->cpuid =
786 netisr_get_cpuid(scb->ifp->if_index % netisr_get_cpucount());
787
788 /* Finish initialization of interface <n>a. */
789 ifp = sca->ifp;
790 ifp->if_softc = sca;
791 strlcpy(ifp->if_xname, name, IFNAMSIZ);
792 ifp->if_dname = ifc->ifc_name;
793 ifp->if_dunit = unit;
794 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
795 ifp->if_start = epair_start;
796 ifp->if_ioctl = epair_ioctl;
797 ifp->if_init = epair_init;
798 ifp->if_snd.ifq_maxlen = ifqmaxlen;
799 /* Assign a hopefully unique, locally administered etheraddr. */
800 eaddr[0] = 0x02;
801 eaddr[3] = (ifp->if_index >> 8) & 0xff;
802 eaddr[4] = ifp->if_index & 0xff;
803 eaddr[5] = 0x0a;
804 ether_ifattach(ifp, eaddr);
805 sca->if_qflush = ifp->if_qflush;
806 ifp->if_qflush = epair_qflush;
807 ifp->if_transmit = epair_transmit;
808 ifp->if_baudrate = IF_Gbps(10UL); /* arbitrary maximum */
809
810 /* Swap the name and finish initialization of interface <n>b. */
811 *dp = 'b';
812
813 ifp = scb->ifp;
814 ifp->if_softc = scb;
815 strlcpy(ifp->if_xname, name, IFNAMSIZ);
816 ifp->if_dname = ifc->ifc_name;
817 ifp->if_dunit = unit;
818 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
819 ifp->if_start = epair_start;
820 ifp->if_ioctl = epair_ioctl;
821 ifp->if_init = epair_init;
822 ifp->if_snd.ifq_maxlen = ifqmaxlen;
823 /* We need to play some tricks here for the second interface. */
824 strlcpy(name, EPAIRNAME, len);
825 error = if_clone_create(name, len, (caddr_t)scb);
826 if (error)
827 panic("%s: if_clone_createif() for our 2nd iface failed: %d",
828 __func__, error);
829 scb->if_qflush = ifp->if_qflush;
830 ifp->if_qflush = epair_qflush;
831 ifp->if_transmit = epair_transmit;
832 ifp->if_baudrate = IF_Gbps(10UL); /* arbitrary maximum */
833
834 /*
835 * Restore name to <n>a as the ifp for this will go into the
836 * cloner list for the initial call.
837 */
838 strlcpy(name, sca->ifp->if_xname, len);
839 DPRINTF("name='%s/%db' created sca=%p scb=%p\n", name, unit, sca, scb);
840
841 /* Tell the world, that we are ready to rock. */
842 sca->ifp->if_drv_flags |= IFF_DRV_RUNNING;
843 scb->ifp->if_drv_flags |= IFF_DRV_RUNNING;
844
845 return (0);
846}
847
848static int
849epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
850{
851 struct ifnet *oifp;
852 struct epair_softc *sca, *scb;
853 int unit, error;
854
855 DPRINTF("ifp=%p\n", ifp);
856
857 /*
858 * In case we called into if_clone_destroyif() ourselves
859 * again to remove the second interface, the softc will be
860 * NULL. In that case so not do anything but return success.
861 */
862 if (ifp->if_softc == NULL)
863 return (0);
864
865 unit = ifp->if_dunit;
866 sca = ifp->if_softc;
867 oifp = sca->oifp;
868 scb = oifp->if_softc;
869
870 DPRINTF("ifp=%p oifp=%p\n", ifp, oifp);
871 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
872 oifp->if_drv_flags &= ~IFF_DRV_RUNNING;
873 ether_ifdetach(oifp);
874 ether_ifdetach(ifp);
875 /*
876 * Wait for all packets to be dispatched to if_input.
877 * The numbers can only go down as the interfaces are
878 * detached so there is no need to use atomics.
879 */
880 DPRINTF("sca refcnt=%u scb refcnt=%u\n", sca->refcount, scb->refcount);
881 EPAIR_REFCOUNT_ASSERT(sca->refcount == 1 && scb->refcount == 1,
882 ("%s: ifp=%p sca->refcount!=1: %d || ifp=%p scb->refcount!=1: %d",
883 __func__, ifp, sca->refcount, oifp, scb->refcount));
884
885 /*
886 * Get rid of our second half.
887 */
888 oifp->if_softc = NULL;
889 error = if_clone_destroyif(ifc, oifp);
890 if (error)
891 panic("%s: if_clone_destroyif() for our 2nd iface failed: %d",
892 __func__, error);
893
894 /*
895 * Finish cleaning up. Free them and release the unit.
896 * As the other of the two interfaces my reside in a different vnet,
897 * we need to switch before freeing them.
898 */
899 CURVNET_SET_QUIET(oifp->if_vnet);
900 if_free_type(oifp, IFT_ETHER);
901 CURVNET_RESTORE();
902 if_free_type(ifp, IFT_ETHER);
903 free(scb, M_EPAIR);
904 free(sca, M_EPAIR);
905 ifc_free_unit(ifc, unit);
906
907 return (0);
908}
909
910static int
911epair_modevent(module_t mod, int type, void *data)
912{
913 int qlimit;
914
915 switch (type) {
916 case MOD_LOAD:
917 /* For now limit us to one global mutex and one inq. */
918 epair_dpcpu_init();
919 epair_nh.nh_qlimit = 42 * ifqmaxlen; /* 42 shall be the number. */
920 if (TUNABLE_INT_FETCH("net.link.epair.netisr_maxqlen", &qlimit))
921 epair_nh.nh_qlimit = qlimit;
922 netisr_register(&epair_nh);
923 if_clone_attach(&epair_cloner);
924 if (bootverbose)
925 printf("%s initialized.\n", EPAIRNAME);
926 break;
927 case MOD_UNLOAD:
928 if_clone_detach(&epair_cloner);
929 netisr_unregister(&epair_nh);
930 epair_dpcpu_detach();
931 if (bootverbose)
932 printf("%s unloaded.\n", EPAIRNAME);
933 break;
934 default:
935 return (EOPNOTSUPP);
936 }
937 return (0);
938}
939
940static moduledata_t epair_mod = {
941 "if_epair",
942 epair_modevent,
943 0
944};
945
946DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
947MODULE_VERSION(if_epair, 1);
328 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu);
329 EPAIR_LOCK(epair_dpcpu);
330 STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list,
331 ifp_next, tvar) {
332 if (ifp == elm->ifp) {
333 struct epair_softc *sc;
334
335 STAILQ_REMOVE(
336 &epair_dpcpu->epair_ifp_drain_list, elm,
337 epair_ifp_drain, ifp_next);
338 /* The cached ifp goes off the list. */
339 sc = ifp->if_softc;
340 EPAIR_REFCOUNT_RELEASE(&sc->refcount);
341 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1,
342 ("%s: ifp=%p sc->refcount not >= 1: %d",
343 __func__, ifp, sc->refcount));
344 free(elm, M_EPAIR);
345 }
346 }
347 EPAIR_UNLOCK(epair_dpcpu);
348 }
349}
350
351static int
352epair_add_ifp_for_draining(struct ifnet *ifp)
353{
354 struct epair_dpcpu *epair_dpcpu;
355 struct epair_softc *sc;
356 struct epair_ifp_drain *elm = NULL;
357
358 sc = ifp->if_softc;
359 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu);
360 EPAIR_LOCK_ASSERT(epair_dpcpu);
361 STAILQ_FOREACH(elm, &epair_dpcpu->epair_ifp_drain_list, ifp_next)
362 if (elm->ifp == ifp)
363 break;
364 /* If the ifp is there already, return success. */
365 if (elm != NULL)
366 return (0);
367
368 elm = malloc(sizeof(struct epair_ifp_drain), M_EPAIR, M_NOWAIT|M_ZERO);
369 if (elm == NULL)
370 return (ENOMEM);
371
372 elm->ifp = ifp;
373 /* Add a reference for the ifp pointer on the list. */
374 EPAIR_REFCOUNT_AQUIRE(&sc->refcount);
375 STAILQ_INSERT_TAIL(&epair_dpcpu->epair_ifp_drain_list, elm, ifp_next);
376
377 return (0);
378}
379
380static void
381epair_start_locked(struct ifnet *ifp)
382{
383 struct epair_dpcpu *epair_dpcpu;
384 struct mbuf *m;
385 struct epair_softc *sc;
386 struct ifnet *oifp;
387 int error;
388
389 DPRINTF("ifp=%p\n", ifp);
390 sc = ifp->if_softc;
391 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu);
392 EPAIR_LOCK_ASSERT(epair_dpcpu);
393
394 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
395 return;
396 if ((ifp->if_flags & IFF_UP) == 0)
397 return;
398
399 /*
400 * We get patckets here from ether_output via if_handoff()
401 * and ned to put them into the input queue of the oifp
402 * and call oifp->if_input() via netisr/epair_sintr().
403 */
404 oifp = sc->oifp;
405 sc = oifp->if_softc;
406 for (;;) {
407 IFQ_DEQUEUE(&ifp->if_snd, m);
408 if (m == NULL)
409 break;
410 BPF_MTAP(ifp, m);
411
412 /*
413 * In case the outgoing interface is not usable,
414 * drop the packet.
415 */
416 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
417 (oifp->if_flags & IFF_UP) ==0) {
418 ifp->if_oerrors++;
419 m_freem(m);
420 continue;
421 }
422 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname);
423
424 /*
425 * Add a reference so the interface cannot go while the
426 * packet is in transit as we rely on rcvif to stay valid.
427 */
428 EPAIR_REFCOUNT_AQUIRE(&sc->refcount);
429 m->m_pkthdr.rcvif = oifp;
430 CURVNET_SET_QUIET(oifp->if_vnet);
431 error = netisr_queue(NETISR_EPAIR, m);
432 CURVNET_RESTORE();
433 if (!error) {
434 ifp->if_opackets++;
435 /* Someone else received the packet. */
436 oifp->if_ipackets++;
437 } else {
438 /* The packet was freed already. */
439 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE;
440 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
441 (void) epair_add_ifp_for_draining(ifp);
442 ifp->if_oerrors++;
443 EPAIR_REFCOUNT_RELEASE(&sc->refcount);
444 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1,
445 ("%s: ifp=%p sc->refcount not >= 1: %d",
446 __func__, oifp, sc->refcount));
447 }
448 }
449}
450
451static void
452epair_start(struct ifnet *ifp)
453{
454 struct epair_dpcpu *epair_dpcpu;
455
456 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu);
457 EPAIR_LOCK(epair_dpcpu);
458 epair_start_locked(ifp);
459 EPAIR_UNLOCK(epair_dpcpu);
460}
461
462static int
463epair_transmit_locked(struct ifnet *ifp, struct mbuf *m)
464{
465 struct epair_dpcpu *epair_dpcpu;
466 struct epair_softc *sc;
467 struct ifnet *oifp;
468 int error, len;
469 short mflags;
470
471 DPRINTF("ifp=%p m=%p\n", ifp, m);
472 sc = ifp->if_softc;
473 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu);
474 EPAIR_LOCK_ASSERT(epair_dpcpu);
475
476 if (m == NULL)
477 return (0);
478
479 /*
480 * We are not going to use the interface en/dequeue mechanism
481 * on the TX side. We are called from ether_output_frame()
482 * and will put the packet into the incoming queue of the
483 * other interface of our pair via the netsir.
484 */
485 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
486 m_freem(m);
487 return (ENXIO);
488 }
489 if ((ifp->if_flags & IFF_UP) == 0) {
490 m_freem(m);
491 return (ENETDOWN);
492 }
493
494 BPF_MTAP(ifp, m);
495
496 /*
497 * In case the outgoing interface is not usable,
498 * drop the packet.
499 */
500 oifp = sc->oifp;
501 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
502 (oifp->if_flags & IFF_UP) ==0) {
503 ifp->if_oerrors++;
504 m_freem(m);
505 return (0);
506 }
507 len = m->m_pkthdr.len;
508 mflags = m->m_flags;
509 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname);
510
511#ifdef ALTQ
512 /* Support ALTQ via the clasic if_start() path. */
513 IF_LOCK(&ifp->if_snd);
514 if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
515 ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error);
516 if (error)
517 ifp->if_snd.ifq_drops++;
518 IF_UNLOCK(&ifp->if_snd);
519 if (!error) {
520 ifp->if_obytes += len;
521 if (mflags & (M_BCAST|M_MCAST))
522 ifp->if_omcasts++;
523
524 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0)
525 epair_start_locked(ifp);
526 else
527 (void)epair_add_ifp_for_draining(ifp);
528 }
529 return (error);
530 }
531 IF_UNLOCK(&ifp->if_snd);
532#endif
533
534 if ((epair_dpcpu->epair_drv_flags & IFF_DRV_OACTIVE) != 0) {
535 /*
536 * Our hardware queue is full, try to fall back
537 * queuing to the ifq but do not call ifp->if_start.
538 * Either we are lucky or the packet is gone.
539 */
540 IFQ_ENQUEUE(&ifp->if_snd, m, error);
541 if (!error)
542 (void)epair_add_ifp_for_draining(ifp);
543 return (error);
544 }
545 sc = oifp->if_softc;
546 /*
547 * Add a reference so the interface cannot go while the
548 * packet is in transit as we rely on rcvif to stay valid.
549 */
550 EPAIR_REFCOUNT_AQUIRE(&sc->refcount);
551 m->m_pkthdr.rcvif = oifp;
552 CURVNET_SET_QUIET(oifp->if_vnet);
553 error = netisr_queue(NETISR_EPAIR, m);
554 CURVNET_RESTORE();
555 if (!error) {
556 ifp->if_opackets++;
557 /*
558 * IFQ_HANDOFF_ADJ/ip_handoff() update statistics,
559 * but as we bypass all this we have to duplicate
560 * the logic another time.
561 */
562 ifp->if_obytes += len;
563 if (mflags & (M_BCAST|M_MCAST))
564 ifp->if_omcasts++;
565 /* Someone else received the packet. */
566 oifp->if_ipackets++;
567 } else {
568 /* The packet was freed already. */
569 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE;
570 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
571 ifp->if_oerrors++;
572 EPAIR_REFCOUNT_RELEASE(&sc->refcount);
573 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1,
574 ("%s: ifp=%p sc->refcount not >= 1: %d",
575 __func__, oifp, sc->refcount));
576 }
577
578 return (error);
579}
580
581static int
582epair_transmit(struct ifnet *ifp, struct mbuf *m)
583{
584 struct epair_dpcpu *epair_dpcpu;
585 int error;
586
587 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu);
588 EPAIR_LOCK(epair_dpcpu);
589 error = epair_transmit_locked(ifp, m);
590 EPAIR_UNLOCK(epair_dpcpu);
591 return (error);
592}
593
594static void
595epair_qflush(struct ifnet *ifp)
596{
597 struct epair_softc *sc;
598
599 sc = ifp->if_softc;
600 KASSERT(sc != NULL, ("%s: ifp=%p, epair_softc gone? sc=%p\n",
601 __func__, ifp, sc));
602 /*
603 * Remove this ifp from all backpointer lists. The interface will not
604 * usable for flushing anyway nor should it have anything to flush
605 * after if_qflush().
606 */
607 epair_remove_ifp_from_draining(ifp);
608
609 if (sc->if_qflush)
610 sc->if_qflush(ifp);
611}
612
613static int
614epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
615{
616 struct ifreq *ifr;
617 int error;
618
619 ifr = (struct ifreq *)data;
620 switch (cmd) {
621 case SIOCSIFFLAGS:
622 case SIOCADDMULTI:
623 case SIOCDELMULTI:
624 error = 0;
625 break;
626
627 case SIOCSIFMTU:
628 /* We basically allow all kinds of MTUs. */
629 ifp->if_mtu = ifr->ifr_mtu;
630 error = 0;
631 break;
632
633 default:
634 /* Let the common ethernet handler process this. */
635 error = ether_ioctl(ifp, cmd, data);
636 break;
637 }
638
639 return (error);
640}
641
642static void
643epair_init(void *dummy __unused)
644{
645}
646
647
648/*
649 * Interface cloning functions.
650 * We use our private ones so that we can create/destroy our secondary
651 * device along with the primary one.
652 */
653static int
654epair_clone_match(struct if_clone *ifc, const char *name)
655{
656 const char *cp;
657
658 DPRINTF("name='%s'\n", name);
659
660 /*
661 * Our base name is epair.
662 * Our interfaces will be named epair<n>[ab].
663 * So accept anything of the following list:
664 * - epair
665 * - epair<n>
666 * but not the epair<n>[ab] versions.
667 */
668 if (strncmp(EPAIRNAME, name, sizeof(EPAIRNAME)-1) != 0)
669 return (0);
670
671 for (cp = name + sizeof(EPAIRNAME) - 1; *cp != '\0'; cp++) {
672 if (*cp < '0' || *cp > '9')
673 return (0);
674 }
675
676 return (1);
677}
678
679static int
680epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
681{
682 struct epair_softc *sca, *scb;
683 struct ifnet *ifp;
684 char *dp;
685 int error, unit, wildcard;
686 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */
687
688 /*
689 * We are abusing params to create our second interface.
690 * Actually we already created it and called if_clone_createif()
691 * for it to do the official insertion procedure the moment we knew
692 * it cannot fail anymore. So just do attach it here.
693 */
694 if (params) {
695 scb = (struct epair_softc *)params;
696 ifp = scb->ifp;
697 /* Assign a hopefully unique, locally administered etheraddr. */
698 eaddr[0] = 0x02;
699 eaddr[3] = (ifp->if_index >> 8) & 0xff;
700 eaddr[4] = ifp->if_index & 0xff;
701 eaddr[5] = 0x0b;
702 ether_ifattach(ifp, eaddr);
703 /* Correctly set the name for the cloner list. */
704 strlcpy(name, scb->ifp->if_xname, len);
705 return (0);
706 }
707
708 /* Try to see if a special unit was requested. */
709 error = ifc_name2unit(name, &unit);
710 if (error != 0)
711 return (error);
712 wildcard = (unit < 0);
713
714 error = ifc_alloc_unit(ifc, &unit);
715 if (error != 0)
716 return (error);
717
718 /*
719 * If no unit had been given, we need to adjust the ifName.
720 * Also make sure there is space for our extra [ab] suffix.
721 */
722 for (dp = name; *dp != '\0'; dp++);
723 if (wildcard) {
724 error = snprintf(dp, len - (dp - name), "%d", unit);
725 if (error > len - (dp - name) - 1) {
726 /* ifName too long. */
727 ifc_free_unit(ifc, unit);
728 return (ENOSPC);
729 }
730 dp += error;
731 }
732 if (len - (dp - name) - 1 < 1) {
733 /* No space left for our [ab] suffix. */
734 ifc_free_unit(ifc, unit);
735 return (ENOSPC);
736 }
737 *dp = 'a';
738 /* Must not change dp so we can replace 'a' by 'b' later. */
739 *(dp+1) = '\0';
740
741 /* Allocate memory for both [ab] interfaces */
742 sca = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO);
743 EPAIR_REFCOUNT_INIT(&sca->refcount, 1);
744 sca->ifp = if_alloc(IFT_ETHER);
745 if (sca->ifp == NULL) {
746 free(sca, M_EPAIR);
747 ifc_free_unit(ifc, unit);
748 return (ENOSPC);
749 }
750
751 scb = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO);
752 EPAIR_REFCOUNT_INIT(&scb->refcount, 1);
753 scb->ifp = if_alloc(IFT_ETHER);
754 if (scb->ifp == NULL) {
755 free(scb, M_EPAIR);
756 if_free(sca->ifp);
757 free(sca, M_EPAIR);
758 ifc_free_unit(ifc, unit);
759 return (ENOSPC);
760 }
761
762 /*
763 * Cross-reference the interfaces so we will be able to free both.
764 */
765 sca->oifp = scb->ifp;
766 scb->oifp = sca->ifp;
767
768 /*
769 * Calculate the cpuid for netisr queueing based on the
770 * ifIndex of the interfaces. As long as we cannot configure
771 * this or use cpuset information easily we cannot guarantee
772 * cache locality but we can at least allow parallelism.
773 */
774 sca->cpuid =
775 netisr_get_cpuid(sca->ifp->if_index % netisr_get_cpucount());
776 scb->cpuid =
777 netisr_get_cpuid(scb->ifp->if_index % netisr_get_cpucount());
778
779 /* Finish initialization of interface <n>a. */
780 ifp = sca->ifp;
781 ifp->if_softc = sca;
782 strlcpy(ifp->if_xname, name, IFNAMSIZ);
783 ifp->if_dname = ifc->ifc_name;
784 ifp->if_dunit = unit;
785 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
786 ifp->if_start = epair_start;
787 ifp->if_ioctl = epair_ioctl;
788 ifp->if_init = epair_init;
789 ifp->if_snd.ifq_maxlen = ifqmaxlen;
790 /* Assign a hopefully unique, locally administered etheraddr. */
791 eaddr[0] = 0x02;
792 eaddr[3] = (ifp->if_index >> 8) & 0xff;
793 eaddr[4] = ifp->if_index & 0xff;
794 eaddr[5] = 0x0a;
795 ether_ifattach(ifp, eaddr);
796 sca->if_qflush = ifp->if_qflush;
797 ifp->if_qflush = epair_qflush;
798 ifp->if_transmit = epair_transmit;
799 ifp->if_baudrate = IF_Gbps(10UL); /* arbitrary maximum */
800
801 /* Swap the name and finish initialization of interface <n>b. */
802 *dp = 'b';
803
804 ifp = scb->ifp;
805 ifp->if_softc = scb;
806 strlcpy(ifp->if_xname, name, IFNAMSIZ);
807 ifp->if_dname = ifc->ifc_name;
808 ifp->if_dunit = unit;
809 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
810 ifp->if_start = epair_start;
811 ifp->if_ioctl = epair_ioctl;
812 ifp->if_init = epair_init;
813 ifp->if_snd.ifq_maxlen = ifqmaxlen;
814 /* We need to play some tricks here for the second interface. */
815 strlcpy(name, EPAIRNAME, len);
816 error = if_clone_create(name, len, (caddr_t)scb);
817 if (error)
818 panic("%s: if_clone_createif() for our 2nd iface failed: %d",
819 __func__, error);
820 scb->if_qflush = ifp->if_qflush;
821 ifp->if_qflush = epair_qflush;
822 ifp->if_transmit = epair_transmit;
823 ifp->if_baudrate = IF_Gbps(10UL); /* arbitrary maximum */
824
825 /*
826 * Restore name to <n>a as the ifp for this will go into the
827 * cloner list for the initial call.
828 */
829 strlcpy(name, sca->ifp->if_xname, len);
830 DPRINTF("name='%s/%db' created sca=%p scb=%p\n", name, unit, sca, scb);
831
832 /* Tell the world, that we are ready to rock. */
833 sca->ifp->if_drv_flags |= IFF_DRV_RUNNING;
834 scb->ifp->if_drv_flags |= IFF_DRV_RUNNING;
835
836 return (0);
837}
838
839static int
840epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
841{
842 struct ifnet *oifp;
843 struct epair_softc *sca, *scb;
844 int unit, error;
845
846 DPRINTF("ifp=%p\n", ifp);
847
848 /*
849 * In case we called into if_clone_destroyif() ourselves
850 * again to remove the second interface, the softc will be
851 * NULL. In that case so not do anything but return success.
852 */
853 if (ifp->if_softc == NULL)
854 return (0);
855
856 unit = ifp->if_dunit;
857 sca = ifp->if_softc;
858 oifp = sca->oifp;
859 scb = oifp->if_softc;
860
861 DPRINTF("ifp=%p oifp=%p\n", ifp, oifp);
862 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
863 oifp->if_drv_flags &= ~IFF_DRV_RUNNING;
864 ether_ifdetach(oifp);
865 ether_ifdetach(ifp);
866 /*
867 * Wait for all packets to be dispatched to if_input.
868 * The numbers can only go down as the interfaces are
869 * detached so there is no need to use atomics.
870 */
871 DPRINTF("sca refcnt=%u scb refcnt=%u\n", sca->refcount, scb->refcount);
872 EPAIR_REFCOUNT_ASSERT(sca->refcount == 1 && scb->refcount == 1,
873 ("%s: ifp=%p sca->refcount!=1: %d || ifp=%p scb->refcount!=1: %d",
874 __func__, ifp, sca->refcount, oifp, scb->refcount));
875
876 /*
877 * Get rid of our second half.
878 */
879 oifp->if_softc = NULL;
880 error = if_clone_destroyif(ifc, oifp);
881 if (error)
882 panic("%s: if_clone_destroyif() for our 2nd iface failed: %d",
883 __func__, error);
884
885 /*
886 * Finish cleaning up. Free them and release the unit.
887 * As the other of the two interfaces my reside in a different vnet,
888 * we need to switch before freeing them.
889 */
890 CURVNET_SET_QUIET(oifp->if_vnet);
891 if_free_type(oifp, IFT_ETHER);
892 CURVNET_RESTORE();
893 if_free_type(ifp, IFT_ETHER);
894 free(scb, M_EPAIR);
895 free(sca, M_EPAIR);
896 ifc_free_unit(ifc, unit);
897
898 return (0);
899}
900
901static int
902epair_modevent(module_t mod, int type, void *data)
903{
904 int qlimit;
905
906 switch (type) {
907 case MOD_LOAD:
908 /* For now limit us to one global mutex and one inq. */
909 epair_dpcpu_init();
910 epair_nh.nh_qlimit = 42 * ifqmaxlen; /* 42 shall be the number. */
911 if (TUNABLE_INT_FETCH("net.link.epair.netisr_maxqlen", &qlimit))
912 epair_nh.nh_qlimit = qlimit;
913 netisr_register(&epair_nh);
914 if_clone_attach(&epair_cloner);
915 if (bootverbose)
916 printf("%s initialized.\n", EPAIRNAME);
917 break;
918 case MOD_UNLOAD:
919 if_clone_detach(&epair_cloner);
920 netisr_unregister(&epair_nh);
921 epair_dpcpu_detach();
922 if (bootverbose)
923 printf("%s unloaded.\n", EPAIRNAME);
924 break;
925 default:
926 return (EOPNOTSUPP);
927 }
928 return (0);
929}
930
931static moduledata_t epair_mod = {
932 "if_epair",
933 epair_modevent,
934 0
935};
936
937DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
938MODULE_VERSION(if_epair, 1);