if_epair.c revision 196019
1194927Sbz/*- 2194927Sbz * Copyright (c) 2008 The FreeBSD Foundation 3195892Sbz * Copyright (c) 2009 Bjoern A. Zeeb <bz@FreeBSD.org> 4194927Sbz * All rights reserved. 5194927Sbz * 6194927Sbz * This software was developed by CK Software GmbH under sponsorship 7194927Sbz * from the FreeBSD Foundation. 8194927Sbz * 9194927Sbz * Redistribution and use in source and binary forms, with or without 10194927Sbz * modification, are permitted provided that the following conditions 11194927Sbz * are met: 12194927Sbz * 1. Redistributions of source code must retain the above copyright 13194927Sbz * notice, this list of conditions and the following disclaimer. 14194927Sbz * 2. Redistributions in binary form must reproduce the above copyright 15194927Sbz * notice, this list of conditions and the following disclaimer in the 16194927Sbz * documentation and/or other materials provided with the distribution. 17194927Sbz * 18194927Sbz * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19194927Sbz * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20194927Sbz * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21194927Sbz * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22194927Sbz * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23194927Sbz * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24194927Sbz * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25194927Sbz * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26194927Sbz * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27194927Sbz * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28194927Sbz * SUCH DAMAGE. 29194927Sbz */ 30194927Sbz 31194927Sbz/* 32195892Sbz * A pair of virtual back-to-back connected ethernet like interfaces 33195892Sbz * (``two interfaces with a virtual cross-over cable''). 34195892Sbz * 35194927Sbz * This is mostly intended to be used to provide connectivity between 36194927Sbz * different virtual network stack instances. 37194927Sbz */ 38194927Sbz/* 39194927Sbz * Things to re-think once we have more experience: 40195892Sbz * - ifp->if_reassign function once we can test with vimage. Depending on 41196019Srwatson * how if_vmove() is going to be improved. 42195892Sbz * - Real random etheraddrs that are checked to be uniquish; we would need 43195892Sbz * to re-do them in case we move the interface between network stacks 44195892Sbz * in a private if_reassign function. 45195892Sbz * In case we bridge to a real interface/network or between indepedent 46195892Sbz * epairs on multiple stacks/machines, we may need this. 47195892Sbz * For now let the user handle that case. 48194927Sbz */ 49194927Sbz 50194927Sbz#include <sys/cdefs.h> 51194927Sbz__FBSDID("$FreeBSD: head/sys/net/if_epair.c 196019 2009-08-01 19:26:27Z rwatson $"); 52194927Sbz 53194927Sbz#include <sys/param.h> 54194927Sbz#include <sys/kernel.h> 55194927Sbz#include <sys/mbuf.h> 56194927Sbz#include <sys/module.h> 57194927Sbz#include <sys/refcount.h> 58194927Sbz#include <sys/queue.h> 59195892Sbz#include <sys/smp.h> 60194927Sbz#include <sys/socket.h> 61194927Sbz#include <sys/sockio.h> 62194927Sbz#include <sys/sysctl.h> 63194927Sbz#include <sys/types.h> 64194927Sbz 65194927Sbz#include <net/bpf.h> 66194927Sbz#include <net/ethernet.h> 67194927Sbz#include <net/if.h> 68194927Sbz#include <net/if_clone.h> 69194927Sbz#include <net/if_var.h> 70194927Sbz#include <net/if_types.h> 71194927Sbz#include <net/netisr.h> 72196019Srwatson#include <net/vnet.h> 73194927Sbz 74194927Sbz#define EPAIRNAME "epair" 75194927Sbz 76194927SbzSYSCTL_DECL(_net_link); 77194927SbzSYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW, 0, "epair sysctl"); 78195892Sbz 79195892Sbz#ifdef EPAIR_DEBUG 80195892Sbzstatic int epair_debug = 0; 81194927SbzSYSCTL_XINT(_net_link_epair, OID_AUTO, epair_debug, CTLFLAG_RW, 82194927Sbz &epair_debug, 0, "if_epair(4) debugging."); 83195892Sbz#define DPRINTF(fmt, arg...) \ 84195892Sbz if (epair_debug) \ 85195892Sbz printf("[%s:%d] " fmt, __func__, __LINE__, ##arg) 86194927Sbz#else 87194927Sbz#define DPRINTF(fmt, arg...) 88194927Sbz#endif 89194927Sbz 90195892Sbzstatic void epair_nh_sintr(struct mbuf *); 91195892Sbzstatic struct mbuf *epair_nh_m2cpuid(struct mbuf *, uintptr_t, u_int *); 92195892Sbzstatic void epair_nh_drainedcpu(u_int); 93195892Sbz 94195892Sbzstatic void epair_start_locked(struct ifnet *); 95195892Sbz 96195892Sbzstatic int epair_clone_match(struct if_clone *, const char *); 97195892Sbzstatic int epair_clone_create(struct if_clone *, char *, size_t, caddr_t); 98195892Sbzstatic int epair_clone_destroy(struct if_clone *, struct ifnet *); 99195892Sbz 100195892Sbz/* Netisr realted definitions and sysctl. */ 101195892Sbzstatic struct netisr_handler epair_nh = { 102195892Sbz .nh_name = EPAIRNAME, 103195892Sbz .nh_proto = NETISR_EPAIR, 104195892Sbz .nh_policy = NETISR_POLICY_CPU, 105195892Sbz .nh_handler = epair_nh_sintr, 106195892Sbz .nh_m2cpuid = epair_nh_m2cpuid, 107195892Sbz .nh_drainedcpu = epair_nh_drainedcpu, 108195892Sbz}; 109195892Sbz 110195892Sbzstatic int 111195892Sbzsysctl_epair_netisr_maxqlen(SYSCTL_HANDLER_ARGS) 112195892Sbz{ 113195892Sbz int error, qlimit; 114195892Sbz 115195892Sbz netisr_getqlimit(&epair_nh, &qlimit); 116195892Sbz error = sysctl_handle_int(oidp, &qlimit, 0, req); 117195892Sbz if (error || !req->newptr) 118195892Sbz return (error); 119195892Sbz if (qlimit < 1) 120195892Sbz return (EINVAL); 121195892Sbz return (netisr_setqlimit(&epair_nh, qlimit)); 122195892Sbz} 123195892SbzSYSCTL_PROC(_net_link_epair, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW, 124195892Sbz 0, 0, sysctl_epair_netisr_maxqlen, "I", 125195892Sbz "Maximum if_epair(4) netisr \"hw\" queue length"); 126195892Sbz 127194927Sbzstruct epair_softc { 128195892Sbz struct ifnet *ifp; /* This ifp. */ 129195892Sbz struct ifnet *oifp; /* other ifp of pair. */ 130195892Sbz u_int refcount; /* # of mbufs in flight. */ 131195892Sbz u_int cpuid; /* CPU ID assigned upon creation. */ 132194927Sbz void (*if_qflush)(struct ifnet *); 133195892Sbz /* Original if_qflush routine. */ 134194927Sbz}; 135194927Sbz 136195892Sbz/* 137195892Sbz * Per-CPU list of ifps with data in the ifq that needs to be flushed 138195892Sbz * to the netisr ``hw'' queue before we allow any further direct queuing 139195892Sbz * to the ``hw'' queue. 140195892Sbz */ 141194927Sbzstruct epair_ifp_drain { 142194927Sbz STAILQ_ENTRY(epair_ifp_drain) ifp_next; 143194927Sbz struct ifnet *ifp; 144194927Sbz}; 145195892SbzSTAILQ_HEAD(eid_list, epair_ifp_drain); 146194927Sbz 147195892Sbz#define EPAIR_LOCK_INIT(dpcpu) mtx_init(&(dpcpu)->if_epair_mtx, \ 148195892Sbz "if_epair", NULL, MTX_DEF) 149195892Sbz#define EPAIR_LOCK_DESTROY(dpcpu) mtx_destroy(&(dpcpu)->if_epair_mtx) 150195892Sbz#define EPAIR_LOCK_ASSERT(dpcpu) mtx_assert(&(dpcpu)->if_epair_mtx, \ 151195892Sbz MA_OWNED) 152195892Sbz#define EPAIR_LOCK(dpcpu) mtx_lock(&(dpcpu)->if_epair_mtx) 153195892Sbz#define EPAIR_UNLOCK(dpcpu) mtx_unlock(&(dpcpu)->if_epair_mtx) 154194927Sbz 155195892Sbz#ifdef INVARIANTS 156195892Sbz#define EPAIR_REFCOUNT_INIT(r, v) refcount_init((r), (v)) 157195892Sbz#define EPAIR_REFCOUNT_AQUIRE(r) refcount_acquire((r)) 158195892Sbz#define EPAIR_REFCOUNT_RELEASE(r) refcount_release((r)) 159195892Sbz#define EPAIR_REFCOUNT_ASSERT(a, p) KASSERT(a, p) 160195892Sbz#else 161195892Sbz#define EPAIR_REFCOUNT_INIT(r, v) 162195892Sbz#define EPAIR_REFCOUNT_AQUIRE(r) 163195892Sbz#define EPAIR_REFCOUNT_RELEASE(r) 164195892Sbz#define EPAIR_REFCOUNT_ASSERT(a, p) 165195892Sbz#endif 166194927Sbz 167194927Sbzstatic MALLOC_DEFINE(M_EPAIR, EPAIRNAME, 168194927Sbz "Pair of virtual cross-over connected Ethernet-like interfaces"); 169194927Sbz 170194927Sbzstatic struct if_clone epair_cloner = IFC_CLONE_INITIALIZER( 171194927Sbz EPAIRNAME, NULL, IF_MAXUNIT, 172194927Sbz NULL, epair_clone_match, epair_clone_create, epair_clone_destroy); 173194927Sbz 174195892Sbz/* 175195892Sbz * DPCPU area and functions. 176195892Sbz */ 177195892Sbzstruct epair_dpcpu { 178195892Sbz struct mtx if_epair_mtx; /* Per-CPU locking. */ 179195892Sbz int epair_drv_flags; /* Per-CPU ``hw'' drv flags. */ 180195892Sbz struct eid_list epair_ifp_drain_list; /* Per-CPU list of ifps with 181195892Sbz * data in the ifq. */ 182195892Sbz}; 183195892SbzDPCPU_DEFINE(struct epair_dpcpu, epair_dpcpu); 184194927Sbz 185195892Sbzstatic void 186195892Sbzepair_dpcpu_init(void) 187195892Sbz{ 188195892Sbz struct epair_dpcpu *epair_dpcpu; 189195892Sbz struct eid_list *s; 190195892Sbz u_int cpuid; 191195892Sbz 192195892Sbz for (cpuid = 0; cpuid <= mp_maxid; cpuid++) { 193195892Sbz if (CPU_ABSENT(cpuid)) 194195892Sbz continue; 195195892Sbz 196195892Sbz epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 197195892Sbz 198195892Sbz /* Initialize per-cpu lock. */ 199195892Sbz EPAIR_LOCK_INIT(epair_dpcpu); 200195892Sbz 201195892Sbz /* Driver flags are per-cpu as are our netisr "hw" queues. */ 202195892Sbz epair_dpcpu->epair_drv_flags = 0; 203195892Sbz 204195892Sbz /* 205195892Sbz * Initialize per-cpu drain list. 206195892Sbz * Manually do what STAILQ_HEAD_INITIALIZER would do. 207195892Sbz */ 208195892Sbz s = &epair_dpcpu->epair_ifp_drain_list; 209195892Sbz s->stqh_first = NULL; 210195892Sbz s->stqh_last = &s->stqh_first; 211195892Sbz } 212195892Sbz} 213195892Sbz 214195892Sbzstatic void 215195892Sbzepair_dpcpu_detach(void) 216195892Sbz{ 217195892Sbz struct epair_dpcpu *epair_dpcpu; 218195892Sbz u_int cpuid; 219195892Sbz 220195892Sbz for (cpuid = 0; cpuid <= mp_maxid; cpuid++) { 221195892Sbz if (CPU_ABSENT(cpuid)) 222195892Sbz continue; 223195892Sbz 224195892Sbz epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 225195892Sbz 226195892Sbz /* Destroy per-cpu lock. */ 227195892Sbz EPAIR_LOCK_DESTROY(epair_dpcpu); 228195892Sbz } 229195892Sbz} 230195892Sbz 231194927Sbz/* 232195892Sbz * Helper functions. 233195892Sbz */ 234195892Sbzstatic u_int 235195892Sbzcpuid_from_ifp(struct ifnet *ifp) 236195892Sbz{ 237195892Sbz struct epair_softc *sc; 238195892Sbz 239195892Sbz if (ifp == NULL) 240195892Sbz return (0); 241195892Sbz sc = ifp->if_softc; 242195892Sbz 243195892Sbz return (sc->cpuid); 244195892Sbz} 245195892Sbz 246195892Sbz/* 247194927Sbz * Netisr handler functions. 248194927Sbz */ 249194927Sbzstatic void 250195892Sbzepair_nh_sintr(struct mbuf *m) 251194927Sbz{ 252194927Sbz struct ifnet *ifp; 253194927Sbz struct epair_softc *sc; 254194927Sbz 255194927Sbz ifp = m->m_pkthdr.rcvif; 256194927Sbz (*ifp->if_input)(ifp, m); 257194927Sbz sc = ifp->if_softc; 258195892Sbz EPAIR_REFCOUNT_RELEASE(&sc->refcount); 259194927Sbz DPRINTF("ifp=%p refcount=%u\n", ifp, sc->refcount); 260194927Sbz} 261194927Sbz 262195892Sbzstatic struct mbuf * 263195892Sbzepair_nh_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid) 264195892Sbz{ 265195892Sbz 266195892Sbz *cpuid = cpuid_from_ifp(m->m_pkthdr.rcvif); 267195892Sbz 268195892Sbz return (m); 269195892Sbz} 270195892Sbz 271194927Sbzstatic void 272195892Sbzepair_nh_drainedcpu(u_int cpuid) 273194927Sbz{ 274195892Sbz struct epair_dpcpu *epair_dpcpu; 275194927Sbz struct epair_ifp_drain *elm, *tvar; 276194927Sbz struct ifnet *ifp; 277194927Sbz 278195892Sbz epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 279195892Sbz EPAIR_LOCK(epair_dpcpu); 280194927Sbz /* 281194927Sbz * Assume our "hw" queue and possibly ifq will be emptied 282194927Sbz * again. In case we will overflow the "hw" queue while 283194927Sbz * draining, epair_start_locked will set IFF_DRV_OACTIVE 284194927Sbz * again and we will stop and return. 285194927Sbz */ 286195892Sbz STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list, 287195892Sbz ifp_next, tvar) { 288194927Sbz ifp = elm->ifp; 289195892Sbz epair_dpcpu->epair_drv_flags &= ~IFF_DRV_OACTIVE; 290194927Sbz ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 291194927Sbz epair_start_locked(ifp); 292194927Sbz 293194927Sbz IFQ_LOCK(&ifp->if_snd); 294194927Sbz if (IFQ_IS_EMPTY(&ifp->if_snd)) { 295195892Sbz STAILQ_REMOVE(&epair_dpcpu->epair_ifp_drain_list, 296195892Sbz elm, epair_ifp_drain, ifp_next); 297194927Sbz free(elm, M_EPAIR); 298194927Sbz } 299194927Sbz IFQ_UNLOCK(&ifp->if_snd); 300194927Sbz 301194927Sbz if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) != 0) { 302194927Sbz /* Our "hw"q overflew again. */ 303195892Sbz epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE 304194927Sbz DPRINTF("hw queue length overflow at %u\n", 305195892Sbz epair_nh.nh_qlimit); 306194927Sbz break; 307194927Sbz } 308194927Sbz } 309195892Sbz EPAIR_UNLOCK(epair_dpcpu); 310194927Sbz} 311194927Sbz 312194927Sbz/* 313194927Sbz * Network interface (`if') related functions. 314194927Sbz */ 315195892Sbzstatic int 316195892Sbzepair_add_ifp_for_draining(struct ifnet *ifp) 317195892Sbz{ 318195892Sbz struct epair_dpcpu *epair_dpcpu; 319195892Sbz struct epair_softc *sc = sc = ifp->if_softc; 320195892Sbz struct epair_ifp_drain *elm = NULL; 321195892Sbz 322195892Sbz epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 323195892Sbz STAILQ_FOREACH(elm, &epair_dpcpu->epair_ifp_drain_list, ifp_next) 324195892Sbz if (elm->ifp == ifp) 325195892Sbz break; 326195892Sbz /* If the ipf is there already, return success. */ 327195892Sbz if (elm != NULL) 328195892Sbz return (0); 329195892Sbz 330195892Sbz elm = malloc(sizeof(struct epair_ifp_drain), M_EPAIR, M_NOWAIT|M_ZERO); 331195892Sbz if (elm == NULL) 332195892Sbz return (ENOMEM); 333195892Sbz 334195892Sbz elm->ifp = ifp; 335195892Sbz STAILQ_INSERT_TAIL(&epair_dpcpu->epair_ifp_drain_list, elm, ifp_next); 336195892Sbz 337195892Sbz return (0); 338195892Sbz} 339195892Sbz 340194927Sbzstatic void 341194927Sbzepair_start_locked(struct ifnet *ifp) 342194927Sbz{ 343195892Sbz struct epair_dpcpu *epair_dpcpu; 344194927Sbz struct mbuf *m; 345194927Sbz struct epair_softc *sc; 346194927Sbz struct ifnet *oifp; 347194927Sbz int error; 348194927Sbz 349194927Sbz DPRINTF("ifp=%p\n", ifp); 350195892Sbz sc = ifp->if_softc; 351195892Sbz epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 352195892Sbz EPAIR_LOCK_ASSERT(epair_dpcpu); 353194927Sbz 354194927Sbz if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 355194927Sbz return; 356194927Sbz if ((ifp->if_flags & IFF_UP) == 0) 357194927Sbz return; 358194927Sbz 359194927Sbz /* 360194927Sbz * We get patckets here from ether_output via if_handoff() 361194927Sbz * and ned to put them into the input queue of the oifp 362194927Sbz * and call oifp->if_input() via netisr/epair_sintr(). 363194927Sbz */ 364194927Sbz oifp = sc->oifp; 365194927Sbz sc = oifp->if_softc; 366194927Sbz for (;;) { 367194927Sbz IFQ_DEQUEUE(&ifp->if_snd, m); 368194927Sbz if (m == NULL) 369194927Sbz break; 370194927Sbz BPF_MTAP(ifp, m); 371194927Sbz 372194927Sbz /* 373194927Sbz * In case the outgoing interface is not usable, 374194927Sbz * drop the packet. 375194927Sbz */ 376194927Sbz if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 377194927Sbz (oifp->if_flags & IFF_UP) ==0) { 378194927Sbz ifp->if_oerrors++; 379194927Sbz m_freem(m); 380194927Sbz continue; 381194927Sbz } 382194927Sbz DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname); 383194927Sbz 384194927Sbz /* 385194927Sbz * Add a reference so the interface cannot go while the 386194927Sbz * packet is in transit as we rely on rcvif to stay valid. 387194927Sbz */ 388195892Sbz EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 389194927Sbz m->m_pkthdr.rcvif = oifp; 390194927Sbz CURVNET_SET_QUIET(oifp->if_vnet); 391194927Sbz error = netisr_queue(NETISR_EPAIR, m); 392194927Sbz CURVNET_RESTORE(); 393194927Sbz if (!error) { 394194927Sbz ifp->if_opackets++; 395194927Sbz /* Someone else received the packet. */ 396194927Sbz oifp->if_ipackets++; 397194927Sbz } else { 398195892Sbz epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 399194927Sbz ifp->if_drv_flags |= IFF_DRV_OACTIVE; 400195892Sbz if (epair_add_ifp_for_draining(ifp)) { 401195892Sbz ifp->if_oerrors++; 402195892Sbz m_freem(m); 403195892Sbz } 404195892Sbz EPAIR_REFCOUNT_RELEASE(&sc->refcount); 405194927Sbz } 406194927Sbz } 407194927Sbz} 408194927Sbz 409194927Sbzstatic void 410194927Sbzepair_start(struct ifnet *ifp) 411194927Sbz{ 412195892Sbz struct epair_dpcpu *epair_dpcpu; 413194927Sbz 414195892Sbz epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu); 415195892Sbz EPAIR_LOCK(epair_dpcpu); 416194927Sbz epair_start_locked(ifp); 417195892Sbz EPAIR_UNLOCK(epair_dpcpu); 418194927Sbz} 419194927Sbz 420194927Sbzstatic int 421194927Sbzepair_transmit_locked(struct ifnet *ifp, struct mbuf *m) 422194927Sbz{ 423195892Sbz struct epair_dpcpu *epair_dpcpu; 424194927Sbz struct epair_softc *sc; 425194927Sbz struct ifnet *oifp; 426194927Sbz int error, len; 427194927Sbz short mflags; 428194927Sbz 429194927Sbz DPRINTF("ifp=%p m=%p\n", ifp, m); 430195892Sbz sc = ifp->if_softc; 431195892Sbz epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 432195892Sbz EPAIR_LOCK_ASSERT(epair_dpcpu); 433194927Sbz 434194927Sbz if (m == NULL) 435194927Sbz return (0); 436194927Sbz 437194927Sbz /* 438194927Sbz * We are not going to use the interface en/dequeue mechanism 439194927Sbz * on the TX side. We are called from ether_output_frame() 440194927Sbz * and will put the packet into the incoming queue of the 441194927Sbz * other interface of our pair via the netsir. 442194927Sbz */ 443194927Sbz if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 444194927Sbz m_freem(m); 445194927Sbz return (ENXIO); 446194927Sbz } 447194927Sbz if ((ifp->if_flags & IFF_UP) == 0) { 448194927Sbz m_freem(m); 449194927Sbz return (ENETDOWN); 450194927Sbz } 451194927Sbz 452194927Sbz BPF_MTAP(ifp, m); 453194927Sbz 454194927Sbz /* 455194927Sbz * In case the outgoing interface is not usable, 456194927Sbz * drop the packet. 457194927Sbz */ 458194927Sbz oifp = sc->oifp; 459194927Sbz if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 460194927Sbz (oifp->if_flags & IFF_UP) ==0) { 461194927Sbz ifp->if_oerrors++; 462194927Sbz m_freem(m); 463194927Sbz return (0); 464194927Sbz } 465194927Sbz len = m->m_pkthdr.len; 466194927Sbz mflags = m->m_flags; 467194927Sbz DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname); 468194927Sbz 469194927Sbz#ifdef ALTQ 470194927Sbz /* Support ALTQ via the clasic if_start() path. */ 471194927Sbz IF_LOCK(&ifp->if_snd); 472194927Sbz if (ALTQ_IS_ENABLED(&ifp->if_snd)) { 473194927Sbz ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error); 474194927Sbz if (error) 475194927Sbz ifp->if_snd.ifq_drops++; 476194927Sbz IF_UNLOCK(&ifp->if_snd); 477194927Sbz if (!error) { 478194927Sbz ifp->if_obytes += len; 479194927Sbz if (mflags & (M_BCAST|M_MCAST)) 480194927Sbz ifp->if_omcasts++; 481194927Sbz 482194927Sbz if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) 483194927Sbz epair_start_locked(ifp); 484194927Sbz else 485195892Sbz (void)epair_add_ifp_for_draining(ifp); 486194927Sbz } 487194927Sbz return (error); 488194927Sbz } 489194927Sbz IF_UNLOCK(&ifp->if_snd); 490194927Sbz#endif 491194927Sbz 492195892Sbz if ((epair_dpcpu->epair_drv_flags & IFF_DRV_OACTIVE) != 0) { 493194927Sbz /* 494194927Sbz * Our hardware queue is full, try to fall back 495194927Sbz * queuing to the ifq but do not call ifp->if_start. 496194927Sbz * Either we are lucky or the packet is gone. 497194927Sbz */ 498194927Sbz IFQ_ENQUEUE(&ifp->if_snd, m, error); 499194927Sbz if (!error) 500195892Sbz (void)epair_add_ifp_for_draining(ifp); 501194927Sbz return (error); 502194927Sbz } 503194927Sbz sc = oifp->if_softc; 504194927Sbz /* 505194927Sbz * Add a reference so the interface cannot go while the 506194927Sbz * packet is in transit as we rely on rcvif to stay valid. 507194927Sbz */ 508195892Sbz EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 509194927Sbz m->m_pkthdr.rcvif = oifp; 510194927Sbz CURVNET_SET_QUIET(oifp->if_vnet); 511194927Sbz error = netisr_queue(NETISR_EPAIR, m); 512194927Sbz CURVNET_RESTORE(); 513194927Sbz if (!error) { 514194927Sbz ifp->if_opackets++; 515194927Sbz /* 516194927Sbz * IFQ_HANDOFF_ADJ/ip_handoff() update statistics, 517194927Sbz * but as we bypass all this we have to duplicate 518194927Sbz * the logic another time. 519194927Sbz */ 520194927Sbz ifp->if_obytes += len; 521194927Sbz if (mflags & (M_BCAST|M_MCAST)) 522194927Sbz ifp->if_omcasts++; 523194927Sbz /* Someone else received the packet. */ 524194927Sbz oifp->if_ipackets++; 525194927Sbz } else { 526194927Sbz /* The packet was freed already. */ 527195892Sbz EPAIR_REFCOUNT_RELEASE(&sc->refcount); 528195892Sbz epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 529194927Sbz ifp->if_drv_flags |= IFF_DRV_OACTIVE; 530194927Sbz } 531194927Sbz 532194927Sbz return (error); 533194927Sbz} 534194927Sbz 535194927Sbzstatic int 536194927Sbzepair_transmit(struct ifnet *ifp, struct mbuf *m) 537194927Sbz{ 538195892Sbz struct epair_dpcpu *epair_dpcpu; 539194927Sbz int error; 540194927Sbz 541195892Sbz epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu); 542195892Sbz EPAIR_LOCK(epair_dpcpu); 543194927Sbz error = epair_transmit_locked(ifp, m); 544195892Sbz EPAIR_UNLOCK(epair_dpcpu); 545194927Sbz return (error); 546194927Sbz} 547194927Sbz 548194927Sbzstatic void 549194927Sbzepair_qflush(struct ifnet *ifp) 550194927Sbz{ 551195892Sbz struct epair_dpcpu *epair_dpcpu; 552194927Sbz struct epair_softc *sc; 553194927Sbz struct ifaltq *ifq; 554194927Sbz 555194927Sbz sc = ifp->if_softc; 556195892Sbz epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 557195892Sbz EPAIR_LOCK(epair_dpcpu); 558194927Sbz ifq = &ifp->if_snd; 559194927Sbz DPRINTF("ifp=%p sc refcnt=%u ifq_len=%u\n", 560194927Sbz ifp, sc->refcount, ifq->ifq_len); 561194927Sbz /* 562195892Sbz * Instead of calling EPAIR_REFCOUNT_RELEASE(&sc->refcount); 563194927Sbz * n times, just subtract for the cleanup. 564194927Sbz */ 565194927Sbz sc->refcount -= ifq->ifq_len; 566195892Sbz EPAIR_UNLOCK(epair_dpcpu); 567194927Sbz if (sc->if_qflush) 568194927Sbz sc->if_qflush(ifp); 569194927Sbz} 570194927Sbz 571194927Sbzstatic int 572194927Sbzepair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 573194927Sbz{ 574194927Sbz struct ifreq *ifr; 575194927Sbz int error; 576194927Sbz 577194927Sbz ifr = (struct ifreq *)data; 578194927Sbz switch (cmd) { 579194927Sbz case SIOCSIFFLAGS: 580194927Sbz case SIOCADDMULTI: 581194927Sbz case SIOCDELMULTI: 582194927Sbz error = 0; 583194927Sbz break; 584194927Sbz 585195892Sbz case SIOCSIFMTU: 586195892Sbz /* We basically allow all kinds of MTUs. */ 587195892Sbz ifp->if_mtu = ifr->ifr_mtu; 588195892Sbz error = 0; 589195892Sbz break; 590195892Sbz 591194927Sbz default: 592194927Sbz /* Let the common ethernet handler process this. */ 593194927Sbz error = ether_ioctl(ifp, cmd, data); 594194927Sbz break; 595194927Sbz } 596194927Sbz 597194927Sbz return (error); 598194927Sbz} 599194927Sbz 600194927Sbzstatic void 601194927Sbzepair_init(void *dummy __unused) 602194927Sbz{ 603194927Sbz} 604194927Sbz 605194927Sbz 606194927Sbz/* 607194927Sbz * Interface cloning functions. 608194927Sbz * We use our private ones so that we can create/destroy our secondary 609194927Sbz * device along with the primary one. 610194927Sbz */ 611194927Sbzstatic int 612194927Sbzepair_clone_match(struct if_clone *ifc, const char *name) 613194927Sbz{ 614194927Sbz const char *cp; 615194927Sbz 616194927Sbz DPRINTF("name='%s'\n", name); 617194927Sbz 618194927Sbz /* 619194927Sbz * Our base name is epair. 620194927Sbz * Our interfaces will be named epair<n>[ab]. 621194927Sbz * So accept anything of the following list: 622194927Sbz * - epair 623194927Sbz * - epair<n> 624194927Sbz * but not the epair<n>[ab] versions. 625194927Sbz */ 626194927Sbz if (strncmp(EPAIRNAME, name, sizeof(EPAIRNAME)-1) != 0) 627194927Sbz return (0); 628194927Sbz 629194927Sbz for (cp = name + sizeof(EPAIRNAME) - 1; *cp != '\0'; cp++) { 630194927Sbz if (*cp < '0' || *cp > '9') 631194927Sbz return (0); 632194927Sbz } 633194927Sbz 634194927Sbz return (1); 635194927Sbz} 636194927Sbz 637194927Sbzstatic int 638194927Sbzepair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) 639194927Sbz{ 640194927Sbz struct epair_softc *sca, *scb; 641194927Sbz struct ifnet *ifp; 642194927Sbz char *dp; 643194927Sbz int error, unit, wildcard; 644194927Sbz uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ 645194927Sbz 646194927Sbz /* 647194927Sbz * We are abusing params to create our second interface. 648194927Sbz * Actually we already created it and called if_clone_createif() 649194927Sbz * for it to do the official insertion procedure the moment we knew 650194927Sbz * it cannot fail anymore. So just do attach it here. 651194927Sbz */ 652194927Sbz if (params) { 653194927Sbz scb = (struct epair_softc *)params; 654194927Sbz ifp = scb->ifp; 655194927Sbz /* Assign a hopefully unique, locally administered etheraddr. */ 656194927Sbz eaddr[0] = 0x02; 657194927Sbz eaddr[3] = (ifp->if_index >> 8) & 0xff; 658194927Sbz eaddr[4] = ifp->if_index & 0xff; 659194927Sbz eaddr[5] = 0x0b; 660194927Sbz ether_ifattach(ifp, eaddr); 661194927Sbz /* Correctly set the name for the cloner list. */ 662194927Sbz strlcpy(name, scb->ifp->if_xname, len); 663194927Sbz return (0); 664194927Sbz } 665194927Sbz 666194927Sbz /* Try to see if a special unit was requested. */ 667194927Sbz error = ifc_name2unit(name, &unit); 668194927Sbz if (error != 0) 669194927Sbz return (error); 670194927Sbz wildcard = (unit < 0); 671194927Sbz 672194927Sbz error = ifc_alloc_unit(ifc, &unit); 673194927Sbz if (error != 0) 674194927Sbz return (error); 675194927Sbz 676194927Sbz /* 677194927Sbz * If no unit had been given, we need to adjust the ifName. 678194927Sbz * Also make sure there is space for our extra [ab] suffix. 679194927Sbz */ 680194927Sbz for (dp = name; *dp != '\0'; dp++); 681194927Sbz if (wildcard) { 682194927Sbz error = snprintf(dp, len - (dp - name), "%d", unit); 683194927Sbz if (error > len - (dp - name) - 1) { 684194927Sbz /* ifName too long. */ 685194927Sbz ifc_free_unit(ifc, unit); 686194927Sbz return (ENOSPC); 687194927Sbz } 688194927Sbz dp += error; 689194927Sbz } 690194927Sbz if (len - (dp - name) - 1 < 1) { 691194927Sbz /* No space left for our [ab] suffix. */ 692194927Sbz ifc_free_unit(ifc, unit); 693194927Sbz return (ENOSPC); 694194927Sbz } 695194927Sbz *dp = 'a'; 696194927Sbz /* Must not change dp so we can replace 'a' by 'b' later. */ 697194927Sbz *(dp+1) = '\0'; 698194927Sbz 699194927Sbz /* Allocate memory for both [ab] interfaces */ 700194927Sbz sca = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 701195892Sbz EPAIR_REFCOUNT_INIT(&sca->refcount, 1); 702194927Sbz sca->ifp = if_alloc(IFT_ETHER); 703194927Sbz if (sca->ifp == NULL) { 704194927Sbz free(sca, M_EPAIR); 705194927Sbz ifc_free_unit(ifc, unit); 706194927Sbz return (ENOSPC); 707194927Sbz } 708194927Sbz 709194927Sbz scb = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 710195892Sbz EPAIR_REFCOUNT_INIT(&scb->refcount, 1); 711194927Sbz scb->ifp = if_alloc(IFT_ETHER); 712194927Sbz if (scb->ifp == NULL) { 713194927Sbz free(scb, M_EPAIR); 714194927Sbz if_free(sca->ifp); 715194927Sbz free(sca, M_EPAIR); 716194927Sbz ifc_free_unit(ifc, unit); 717194927Sbz return (ENOSPC); 718194927Sbz } 719194927Sbz 720194927Sbz /* 721194927Sbz * Cross-reference the interfaces so we will be able to free both. 722194927Sbz */ 723194927Sbz sca->oifp = scb->ifp; 724194927Sbz scb->oifp = sca->ifp; 725195892Sbz 726195892Sbz /* 727195892Sbz * Calculate the cpuid for netisr queueing based on the 728195892Sbz * ifIndex of the interfaces. As long as we cannot configure 729195892Sbz * this or use cpuset information easily we cannot guarantee 730195892Sbz * cache locality but we can at least allow parallelism. 731195892Sbz */ 732195892Sbz sca->cpuid = 733195892Sbz netisr_get_cpuid(sca->ifp->if_index % netisr_get_cpucount()); 734195892Sbz scb->cpuid = 735195892Sbz netisr_get_cpuid(scb->ifp->if_index % netisr_get_cpucount()); 736194927Sbz 737194927Sbz /* Finish initialization of interface <n>a. */ 738194927Sbz ifp = sca->ifp; 739194927Sbz ifp->if_softc = sca; 740194927Sbz strlcpy(ifp->if_xname, name, IFNAMSIZ); 741194927Sbz ifp->if_dname = ifc->ifc_name; 742194927Sbz ifp->if_dunit = unit; 743194927Sbz ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 744194927Sbz ifp->if_start = epair_start; 745194927Sbz ifp->if_ioctl = epair_ioctl; 746194927Sbz ifp->if_init = epair_init; 747194927Sbz ifp->if_snd.ifq_maxlen = ifqmaxlen; 748194927Sbz /* Assign a hopefully unique, locally administered etheraddr. */ 749194927Sbz eaddr[0] = 0x02; 750194927Sbz eaddr[3] = (ifp->if_index >> 8) & 0xff; 751194927Sbz eaddr[4] = ifp->if_index & 0xff; 752194927Sbz eaddr[5] = 0x0a; 753194927Sbz ether_ifattach(ifp, eaddr); 754194927Sbz sca->if_qflush = ifp->if_qflush; 755194927Sbz ifp->if_qflush = epair_qflush; 756194927Sbz ifp->if_transmit = epair_transmit; 757194927Sbz ifp->if_baudrate = IF_Gbps(10UL); /* arbitrary maximum */ 758194927Sbz 759194927Sbz /* Swap the name and finish initialization of interface <n>b. */ 760194927Sbz *dp = 'b'; 761194927Sbz 762194927Sbz ifp = scb->ifp; 763194927Sbz ifp->if_softc = scb; 764194927Sbz strlcpy(ifp->if_xname, name, IFNAMSIZ); 765194927Sbz ifp->if_dname = ifc->ifc_name; 766194927Sbz ifp->if_dunit = unit; 767194927Sbz ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 768194927Sbz ifp->if_start = epair_start; 769194927Sbz ifp->if_ioctl = epair_ioctl; 770194927Sbz ifp->if_init = epair_init; 771194927Sbz ifp->if_snd.ifq_maxlen = ifqmaxlen; 772194927Sbz /* We need to play some tricks here for the second interface. */ 773194927Sbz strlcpy(name, EPAIRNAME, len); 774194927Sbz error = if_clone_create(name, len, (caddr_t)scb); 775194927Sbz if (error) 776194927Sbz panic("%s: if_clone_createif() for our 2nd iface failed: %d", 777194927Sbz __func__, error); 778194927Sbz scb->if_qflush = ifp->if_qflush; 779194927Sbz ifp->if_qflush = epair_qflush; 780194927Sbz ifp->if_transmit = epair_transmit; 781194927Sbz ifp->if_baudrate = IF_Gbps(10UL); /* arbitrary maximum */ 782194927Sbz 783194927Sbz /* 784194927Sbz * Restore name to <n>a as the ifp for this will go into the 785194927Sbz * cloner list for the initial call. 786194927Sbz */ 787194927Sbz strlcpy(name, sca->ifp->if_xname, len); 788194927Sbz DPRINTF("name='%s/%db' created sca=%p scb=%p\n", name, unit, sca, scb); 789194927Sbz 790194927Sbz /* Tell the world, that we are ready to rock. */ 791194927Sbz sca->ifp->if_drv_flags |= IFF_DRV_RUNNING; 792194927Sbz scb->ifp->if_drv_flags |= IFF_DRV_RUNNING; 793194927Sbz 794194927Sbz return (0); 795194927Sbz} 796194927Sbz 797194927Sbzstatic int 798194927Sbzepair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) 799194927Sbz{ 800194927Sbz struct ifnet *oifp; 801194927Sbz struct epair_softc *sca, *scb; 802194927Sbz int unit, error; 803194927Sbz 804194927Sbz DPRINTF("ifp=%p\n", ifp); 805194927Sbz 806194927Sbz /* 807194927Sbz * In case we called into if_clone_destroyif() ourselves 808194927Sbz * again to remove the second interface, the softc will be 809194927Sbz * NULL. In that case so not do anything but return success. 810194927Sbz */ 811194927Sbz if (ifp->if_softc == NULL) 812194927Sbz return (0); 813194927Sbz 814194927Sbz unit = ifp->if_dunit; 815194927Sbz sca = ifp->if_softc; 816194927Sbz oifp = sca->oifp; 817194927Sbz scb = oifp->if_softc; 818194927Sbz 819194927Sbz DPRINTF("ifp=%p oifp=%p\n", ifp, oifp); 820194927Sbz ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 821194927Sbz oifp->if_drv_flags &= ~IFF_DRV_RUNNING; 822194927Sbz ether_ifdetach(oifp); 823194927Sbz ether_ifdetach(ifp); 824194927Sbz /* 825194927Sbz * Wait for all packets to be dispatched to if_input. 826194927Sbz * The numbers can only go down as the interfaces are 827194927Sbz * detached so there is no need to use atomics. 828194927Sbz */ 829194927Sbz DPRINTF("sca refcnt=%u scb refcnt=%u\n", sca->refcount, scb->refcount); 830195892Sbz EPAIR_REFCOUNT_ASSERT(sca->refcount == 1 && scb->refcount == 1, 831194927Sbz ("%s: sca->refcount!=1: %d || scb->refcount!=1: %d", 832194927Sbz __func__, sca->refcount, scb->refcount)); 833194927Sbz 834194927Sbz /* 835194927Sbz * Get rid of our second half. 836194927Sbz */ 837194927Sbz oifp->if_softc = NULL; 838194927Sbz error = if_clone_destroyif(ifc, oifp); 839194927Sbz if (error) 840194927Sbz panic("%s: if_clone_destroyif() for our 2nd iface failed: %d", 841194927Sbz __func__, error); 842194927Sbz 843195892Sbz /* 844195892Sbz * Finish cleaning up. Free them and release the unit. 845195892Sbz * As the other of the two interfaces my reside in a different vnet, 846195892Sbz * we need to switch before freeing them. 847195892Sbz */ 848195892Sbz CURVNET_SET_QUIET(oifp->if_vnet); 849194927Sbz if_free_type(oifp, IFT_ETHER); 850195892Sbz CURVNET_RESTORE(); 851194927Sbz if_free_type(ifp, IFT_ETHER); 852194927Sbz free(scb, M_EPAIR); 853194927Sbz free(sca, M_EPAIR); 854194927Sbz ifc_free_unit(ifc, unit); 855194927Sbz 856194927Sbz return (0); 857194927Sbz} 858194927Sbz 859194927Sbzstatic int 860194927Sbzepair_modevent(module_t mod, int type, void *data) 861194927Sbz{ 862195892Sbz int qlimit; 863194927Sbz 864194927Sbz switch (type) { 865194927Sbz case MOD_LOAD: 866194927Sbz /* For now limit us to one global mutex and one inq. */ 867195892Sbz epair_dpcpu_init(); 868195892Sbz epair_nh.nh_qlimit = 42 * ifqmaxlen; /* 42 shall be the number. */ 869195892Sbz if (TUNABLE_INT_FETCH("net.link.epair.netisr_maxqlen", &qlimit)) 870195892Sbz epair_nh.nh_qlimit = qlimit; 871195892Sbz netisr_register(&epair_nh); 872194927Sbz if_clone_attach(&epair_cloner); 873194927Sbz if (bootverbose) 874194927Sbz printf("%s initialized.\n", EPAIRNAME); 875194927Sbz break; 876194927Sbz case MOD_UNLOAD: 877194927Sbz if_clone_detach(&epair_cloner); 878195892Sbz netisr_unregister(&epair_nh); 879195892Sbz epair_dpcpu_detach(); 880194927Sbz if (bootverbose) 881194927Sbz printf("%s unloaded.\n", EPAIRNAME); 882194927Sbz break; 883194927Sbz default: 884194927Sbz return (EOPNOTSUPP); 885194927Sbz } 886194927Sbz return (0); 887194927Sbz} 888194927Sbz 889194927Sbzstatic moduledata_t epair_mod = { 890194927Sbz "if_epair", 891194927Sbz epair_modevent, 892194927Sbz 0 893194927Sbz}; 894194927Sbz 895194927SbzDECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 896194927SbzMODULE_VERSION(if_epair, 1); 897