if_epair.c revision 287594
1/*- 2 * Copyright (c) 2008 The FreeBSD Foundation 3 * Copyright (c) 2009-2010 Bjoern A. Zeeb <bz@FreeBSD.org> 4 * All rights reserved. 5 * 6 * This software was developed by CK Software GmbH under sponsorship 7 * from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31/* 32 * A pair of virtual back-to-back connected ethernet like interfaces 33 * (``two interfaces with a virtual cross-over cable''). 34 * 35 * This is mostly intended to be used to provide connectivity between 36 * different virtual network stack instances. 37 */ 38/* 39 * Things to re-think once we have more experience: 40 * - ifp->if_reassign function once we can test with vimage. Depending on 41 * how if_vmove() is going to be improved. 42 * - Real random etheraddrs that are checked to be uniquish; we would need 43 * to re-do them in case we move the interface between network stacks 44 * in a private if_reassign function. 45 * In case we bridge to a real interface/network or between indepedent 46 * epairs on multiple stacks/machines, we may need this. 47 * For now let the user handle that case. 48 */ 49 50#include <sys/cdefs.h> 51__FBSDID("$FreeBSD: stable/10/sys/net/if_epair.c 287594 2015-09-09 08:52:39Z hrs $"); 52 53#include <sys/param.h> 54#include <sys/kernel.h> 55#include <sys/mbuf.h> 56#include <sys/module.h> 57#include <sys/refcount.h> 58#include <sys/queue.h> 59#include <sys/smp.h> 60#include <sys/socket.h> 61#include <sys/sockio.h> 62#include <sys/sysctl.h> 63#include <sys/types.h> 64 65#include <net/bpf.h> 66#include <net/ethernet.h> 67#include <net/if.h> 68#include <net/if_clone.h> 69#include <net/if_media.h> 70#include <net/if_var.h> 71#include <net/if_types.h> 72#include <net/netisr.h> 73#include <net/vnet.h> 74 75SYSCTL_DECL(_net_link); 76static SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW, 0, "epair sysctl"); 77 78#ifdef EPAIR_DEBUG 79static int epair_debug = 0; 80SYSCTL_INT(_net_link_epair, OID_AUTO, epair_debug, CTLFLAG_RW, 81 &epair_debug, 0, "if_epair(4) debugging."); 82#define DPRINTF(fmt, arg...) \ 83 if (epair_debug) \ 84 printf("[%s:%d] " fmt, __func__, __LINE__, ##arg) 85#else 86#define DPRINTF(fmt, arg...) 87#endif 88 89static void epair_nh_sintr(struct mbuf *); 90static struct mbuf *epair_nh_m2cpuid(struct mbuf *, uintptr_t, u_int *); 91static void epair_nh_drainedcpu(u_int); 92 93static void epair_start_locked(struct ifnet *); 94static int epair_media_change(struct ifnet *); 95static void epair_media_status(struct ifnet *, struct ifmediareq *); 96 97static int epair_clone_match(struct if_clone *, const char *); 98static int epair_clone_create(struct if_clone *, char *, size_t, caddr_t); 99static int epair_clone_destroy(struct if_clone *, struct ifnet *); 100 101static const char epairname[] = "epair"; 102 103/* Netisr related definitions and sysctl. */ 104static struct netisr_handler epair_nh = { 105 .nh_name = epairname, 106 .nh_proto = NETISR_EPAIR, 107 .nh_policy = NETISR_POLICY_CPU, 108 .nh_handler = epair_nh_sintr, 109 .nh_m2cpuid = epair_nh_m2cpuid, 110 .nh_drainedcpu = epair_nh_drainedcpu, 111}; 112 113static int 114sysctl_epair_netisr_maxqlen(SYSCTL_HANDLER_ARGS) 115{ 116 int error, qlimit; 117 118 netisr_getqlimit(&epair_nh, &qlimit); 119 error = sysctl_handle_int(oidp, &qlimit, 0, req); 120 if (error || !req->newptr) 121 return (error); 122 if (qlimit < 1) 123 return (EINVAL); 124 return (netisr_setqlimit(&epair_nh, qlimit)); 125} 126SYSCTL_PROC(_net_link_epair, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW, 127 0, 0, sysctl_epair_netisr_maxqlen, "I", 128 "Maximum if_epair(4) netisr \"hw\" queue length"); 129 130struct epair_softc { 131 struct ifnet *ifp; /* This ifp. */ 132 struct ifnet *oifp; /* other ifp of pair. */ 133 struct ifmedia media; /* Media config (fake). */ 134 u_int refcount; /* # of mbufs in flight. */ 135 u_int cpuid; /* CPU ID assigned upon creation. */ 136 void (*if_qflush)(struct ifnet *); 137 /* Original if_qflush routine. */ 138}; 139 140/* 141 * Per-CPU list of ifps with data in the ifq that needs to be flushed 142 * to the netisr ``hw'' queue before we allow any further direct queuing 143 * to the ``hw'' queue. 144 */ 145struct epair_ifp_drain { 146 STAILQ_ENTRY(epair_ifp_drain) ifp_next; 147 struct ifnet *ifp; 148}; 149STAILQ_HEAD(eid_list, epair_ifp_drain); 150 151#define EPAIR_LOCK_INIT(dpcpu) mtx_init(&(dpcpu)->if_epair_mtx, \ 152 "if_epair", NULL, MTX_DEF) 153#define EPAIR_LOCK_DESTROY(dpcpu) mtx_destroy(&(dpcpu)->if_epair_mtx) 154#define EPAIR_LOCK_ASSERT(dpcpu) mtx_assert(&(dpcpu)->if_epair_mtx, \ 155 MA_OWNED) 156#define EPAIR_LOCK(dpcpu) mtx_lock(&(dpcpu)->if_epair_mtx) 157#define EPAIR_UNLOCK(dpcpu) mtx_unlock(&(dpcpu)->if_epair_mtx) 158 159#ifdef INVARIANTS 160#define EPAIR_REFCOUNT_INIT(r, v) refcount_init((r), (v)) 161#define EPAIR_REFCOUNT_AQUIRE(r) refcount_acquire((r)) 162#define EPAIR_REFCOUNT_RELEASE(r) refcount_release((r)) 163#define EPAIR_REFCOUNT_ASSERT(a, p) KASSERT(a, p) 164#else 165#define EPAIR_REFCOUNT_INIT(r, v) 166#define EPAIR_REFCOUNT_AQUIRE(r) 167#define EPAIR_REFCOUNT_RELEASE(r) 168#define EPAIR_REFCOUNT_ASSERT(a, p) 169#endif 170 171static MALLOC_DEFINE(M_EPAIR, epairname, 172 "Pair of virtual cross-over connected Ethernet-like interfaces"); 173 174static VNET_DEFINE(struct if_clone *, epair_cloner); 175#define V_epair_cloner VNET(epair_cloner) 176 177/* 178 * DPCPU area and functions. 179 */ 180struct epair_dpcpu { 181 struct mtx if_epair_mtx; /* Per-CPU locking. */ 182 int epair_drv_flags; /* Per-CPU ``hw'' drv flags. */ 183 struct eid_list epair_ifp_drain_list; /* Per-CPU list of ifps with 184 * data in the ifq. */ 185}; 186DPCPU_DEFINE(struct epair_dpcpu, epair_dpcpu); 187 188static void 189epair_dpcpu_init(void) 190{ 191 struct epair_dpcpu *epair_dpcpu; 192 struct eid_list *s; 193 u_int cpuid; 194 195 CPU_FOREACH(cpuid) { 196 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 197 198 /* Initialize per-cpu lock. */ 199 EPAIR_LOCK_INIT(epair_dpcpu); 200 201 /* Driver flags are per-cpu as are our netisr "hw" queues. */ 202 epair_dpcpu->epair_drv_flags = 0; 203 204 /* 205 * Initialize per-cpu drain list. 206 * Manually do what STAILQ_HEAD_INITIALIZER would do. 207 */ 208 s = &epair_dpcpu->epair_ifp_drain_list; 209 s->stqh_first = NULL; 210 s->stqh_last = &s->stqh_first; 211 } 212} 213 214static void 215epair_dpcpu_detach(void) 216{ 217 struct epair_dpcpu *epair_dpcpu; 218 u_int cpuid; 219 220 CPU_FOREACH(cpuid) { 221 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 222 223 /* Destroy per-cpu lock. */ 224 EPAIR_LOCK_DESTROY(epair_dpcpu); 225 } 226} 227 228/* 229 * Helper functions. 230 */ 231static u_int 232cpuid_from_ifp(struct ifnet *ifp) 233{ 234 struct epair_softc *sc; 235 236 if (ifp == NULL) 237 return (0); 238 sc = ifp->if_softc; 239 240 return (sc->cpuid); 241} 242 243/* 244 * Netisr handler functions. 245 */ 246static void 247epair_nh_sintr(struct mbuf *m) 248{ 249 struct ifnet *ifp; 250 struct epair_softc *sc; 251 252 ifp = m->m_pkthdr.rcvif; 253 (*ifp->if_input)(ifp, m); 254 sc = ifp->if_softc; 255 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 256 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 257 ("%s: ifp=%p sc->refcount not >= 1: %d", 258 __func__, ifp, sc->refcount)); 259 DPRINTF("ifp=%p refcount=%u\n", ifp, sc->refcount); 260} 261 262static struct mbuf * 263epair_nh_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid) 264{ 265 266 *cpuid = cpuid_from_ifp(m->m_pkthdr.rcvif); 267 268 return (m); 269} 270 271static void 272epair_nh_drainedcpu(u_int cpuid) 273{ 274 struct epair_dpcpu *epair_dpcpu; 275 struct epair_ifp_drain *elm, *tvar; 276 struct ifnet *ifp; 277 278 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 279 EPAIR_LOCK(epair_dpcpu); 280 /* 281 * Assume our "hw" queue and possibly ifq will be emptied 282 * again. In case we will overflow the "hw" queue while 283 * draining, epair_start_locked will set IFF_DRV_OACTIVE 284 * again and we will stop and return. 285 */ 286 STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list, 287 ifp_next, tvar) { 288 ifp = elm->ifp; 289 epair_dpcpu->epair_drv_flags &= ~IFF_DRV_OACTIVE; 290 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 291 epair_start_locked(ifp); 292 293 IFQ_LOCK(&ifp->if_snd); 294 if (IFQ_IS_EMPTY(&ifp->if_snd)) { 295 struct epair_softc *sc; 296 297 STAILQ_REMOVE(&epair_dpcpu->epair_ifp_drain_list, 298 elm, epair_ifp_drain, ifp_next); 299 /* The cached ifp goes off the list. */ 300 sc = ifp->if_softc; 301 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 302 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 303 ("%s: ifp=%p sc->refcount not >= 1: %d", 304 __func__, ifp, sc->refcount)); 305 free(elm, M_EPAIR); 306 } 307 IFQ_UNLOCK(&ifp->if_snd); 308 309 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) != 0) { 310 /* Our "hw"q overflew again. */ 311 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 312 DPRINTF("hw queue length overflow at %u\n", 313 epair_nh.nh_qlimit); 314 break; 315 } 316 } 317 EPAIR_UNLOCK(epair_dpcpu); 318} 319 320/* 321 * Network interface (`if') related functions. 322 */ 323static void 324epair_remove_ifp_from_draining(struct ifnet *ifp) 325{ 326 struct epair_dpcpu *epair_dpcpu; 327 struct epair_ifp_drain *elm, *tvar; 328 u_int cpuid; 329 330 CPU_FOREACH(cpuid) { 331 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 332 EPAIR_LOCK(epair_dpcpu); 333 STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list, 334 ifp_next, tvar) { 335 if (ifp == elm->ifp) { 336 struct epair_softc *sc; 337 338 STAILQ_REMOVE( 339 &epair_dpcpu->epair_ifp_drain_list, elm, 340 epair_ifp_drain, ifp_next); 341 /* The cached ifp goes off the list. */ 342 sc = ifp->if_softc; 343 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 344 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 345 ("%s: ifp=%p sc->refcount not >= 1: %d", 346 __func__, ifp, sc->refcount)); 347 free(elm, M_EPAIR); 348 } 349 } 350 EPAIR_UNLOCK(epair_dpcpu); 351 } 352} 353 354static int 355epair_add_ifp_for_draining(struct ifnet *ifp) 356{ 357 struct epair_dpcpu *epair_dpcpu; 358 struct epair_softc *sc; 359 struct epair_ifp_drain *elm = NULL; 360 361 sc = ifp->if_softc; 362 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 363 EPAIR_LOCK_ASSERT(epair_dpcpu); 364 STAILQ_FOREACH(elm, &epair_dpcpu->epair_ifp_drain_list, ifp_next) 365 if (elm->ifp == ifp) 366 break; 367 /* If the ifp is there already, return success. */ 368 if (elm != NULL) 369 return (0); 370 371 elm = malloc(sizeof(struct epair_ifp_drain), M_EPAIR, M_NOWAIT|M_ZERO); 372 if (elm == NULL) 373 return (ENOMEM); 374 375 elm->ifp = ifp; 376 /* Add a reference for the ifp pointer on the list. */ 377 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 378 STAILQ_INSERT_TAIL(&epair_dpcpu->epair_ifp_drain_list, elm, ifp_next); 379 380 return (0); 381} 382 383static void 384epair_start_locked(struct ifnet *ifp) 385{ 386 struct epair_dpcpu *epair_dpcpu; 387 struct mbuf *m; 388 struct epair_softc *sc; 389 struct ifnet *oifp; 390 int error; 391 392 DPRINTF("ifp=%p\n", ifp); 393 sc = ifp->if_softc; 394 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 395 EPAIR_LOCK_ASSERT(epair_dpcpu); 396 397 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 398 return; 399 if ((ifp->if_flags & IFF_UP) == 0) 400 return; 401 402 /* 403 * We get patckets here from ether_output via if_handoff() 404 * and ned to put them into the input queue of the oifp 405 * and call oifp->if_input() via netisr/epair_sintr(). 406 */ 407 oifp = sc->oifp; 408 sc = oifp->if_softc; 409 for (;;) { 410 IFQ_DEQUEUE(&ifp->if_snd, m); 411 if (m == NULL) 412 break; 413 BPF_MTAP(ifp, m); 414 415 /* 416 * In case the outgoing interface is not usable, 417 * drop the packet. 418 */ 419 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 420 (oifp->if_flags & IFF_UP) ==0) { 421 ifp->if_oerrors++; 422 m_freem(m); 423 continue; 424 } 425 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname); 426 427 /* 428 * Add a reference so the interface cannot go while the 429 * packet is in transit as we rely on rcvif to stay valid. 430 */ 431 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 432 m->m_pkthdr.rcvif = oifp; 433 CURVNET_SET_QUIET(oifp->if_vnet); 434 error = netisr_queue(NETISR_EPAIR, m); 435 CURVNET_RESTORE(); 436 if (!error) { 437 ifp->if_opackets++; 438 /* Someone else received the packet. */ 439 oifp->if_ipackets++; 440 } else { 441 /* The packet was freed already. */ 442 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 443 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 444 (void) epair_add_ifp_for_draining(ifp); 445 ifp->if_oerrors++; 446 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 447 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 448 ("%s: ifp=%p sc->refcount not >= 1: %d", 449 __func__, oifp, sc->refcount)); 450 } 451 } 452} 453 454static void 455epair_start(struct ifnet *ifp) 456{ 457 struct epair_dpcpu *epair_dpcpu; 458 459 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu); 460 EPAIR_LOCK(epair_dpcpu); 461 epair_start_locked(ifp); 462 EPAIR_UNLOCK(epair_dpcpu); 463} 464 465static int 466epair_transmit_locked(struct ifnet *ifp, struct mbuf *m) 467{ 468 struct epair_dpcpu *epair_dpcpu; 469 struct epair_softc *sc; 470 struct ifnet *oifp; 471 int error, len; 472 short mflags; 473 474 DPRINTF("ifp=%p m=%p\n", ifp, m); 475 sc = ifp->if_softc; 476 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 477 EPAIR_LOCK_ASSERT(epair_dpcpu); 478 479 if (m == NULL) 480 return (0); 481 482 /* 483 * We are not going to use the interface en/dequeue mechanism 484 * on the TX side. We are called from ether_output_frame() 485 * and will put the packet into the incoming queue of the 486 * other interface of our pair via the netsir. 487 */ 488 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 489 m_freem(m); 490 return (ENXIO); 491 } 492 if ((ifp->if_flags & IFF_UP) == 0) { 493 m_freem(m); 494 return (ENETDOWN); 495 } 496 497 BPF_MTAP(ifp, m); 498 499 /* 500 * In case the outgoing interface is not usable, 501 * drop the packet. 502 */ 503 oifp = sc->oifp; 504 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 505 (oifp->if_flags & IFF_UP) ==0) { 506 ifp->if_oerrors++; 507 m_freem(m); 508 return (0); 509 } 510 len = m->m_pkthdr.len; 511 mflags = m->m_flags; 512 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname); 513 514#ifdef ALTQ 515 /* Support ALTQ via the clasic if_start() path. */ 516 IF_LOCK(&ifp->if_snd); 517 if (ALTQ_IS_ENABLED(&ifp->if_snd)) { 518 ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error); 519 if (error) 520 ifp->if_snd.ifq_drops++; 521 IF_UNLOCK(&ifp->if_snd); 522 if (!error) { 523 ifp->if_obytes += len; 524 if (mflags & (M_BCAST|M_MCAST)) 525 ifp->if_omcasts++; 526 527 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) 528 epair_start_locked(ifp); 529 else 530 (void)epair_add_ifp_for_draining(ifp); 531 } 532 return (error); 533 } 534 IF_UNLOCK(&ifp->if_snd); 535#endif 536 537 if ((epair_dpcpu->epair_drv_flags & IFF_DRV_OACTIVE) != 0) { 538 /* 539 * Our hardware queue is full, try to fall back 540 * queuing to the ifq but do not call ifp->if_start. 541 * Either we are lucky or the packet is gone. 542 */ 543 IFQ_ENQUEUE(&ifp->if_snd, m, error); 544 if (!error) 545 (void)epair_add_ifp_for_draining(ifp); 546 return (error); 547 } 548 sc = oifp->if_softc; 549 /* 550 * Add a reference so the interface cannot go while the 551 * packet is in transit as we rely on rcvif to stay valid. 552 */ 553 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 554 m->m_pkthdr.rcvif = oifp; 555 CURVNET_SET_QUIET(oifp->if_vnet); 556 error = netisr_queue(NETISR_EPAIR, m); 557 CURVNET_RESTORE(); 558 if (!error) { 559 ifp->if_opackets++; 560 /* 561 * IFQ_HANDOFF_ADJ/ip_handoff() update statistics, 562 * but as we bypass all this we have to duplicate 563 * the logic another time. 564 */ 565 ifp->if_obytes += len; 566 if (mflags & (M_BCAST|M_MCAST)) 567 ifp->if_omcasts++; 568 /* Someone else received the packet. */ 569 oifp->if_ipackets++; 570 } else { 571 /* The packet was freed already. */ 572 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 573 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 574 ifp->if_oerrors++; 575 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 576 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 577 ("%s: ifp=%p sc->refcount not >= 1: %d", 578 __func__, oifp, sc->refcount)); 579 } 580 581 return (error); 582} 583 584static int 585epair_transmit(struct ifnet *ifp, struct mbuf *m) 586{ 587 struct epair_dpcpu *epair_dpcpu; 588 int error; 589 590 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu); 591 EPAIR_LOCK(epair_dpcpu); 592 error = epair_transmit_locked(ifp, m); 593 EPAIR_UNLOCK(epair_dpcpu); 594 return (error); 595} 596 597static void 598epair_qflush(struct ifnet *ifp) 599{ 600 struct epair_softc *sc; 601 602 sc = ifp->if_softc; 603 KASSERT(sc != NULL, ("%s: ifp=%p, epair_softc gone? sc=%p\n", 604 __func__, ifp, sc)); 605 /* 606 * Remove this ifp from all backpointer lists. The interface will not 607 * usable for flushing anyway nor should it have anything to flush 608 * after if_qflush(). 609 */ 610 epair_remove_ifp_from_draining(ifp); 611 612 if (sc->if_qflush) 613 sc->if_qflush(ifp); 614} 615 616static int 617epair_media_change(struct ifnet *ifp __unused) 618{ 619 620 /* Do nothing. */ 621 return (0); 622} 623 624static void 625epair_media_status(struct ifnet *ifp __unused, struct ifmediareq *imr) 626{ 627 628 imr->ifm_status = IFM_AVALID | IFM_ACTIVE; 629 imr->ifm_active = IFM_ETHER | IFM_10G_T | IFM_FDX; 630} 631 632static int 633epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 634{ 635 struct epair_softc *sc; 636 struct ifreq *ifr; 637 int error; 638 639 ifr = (struct ifreq *)data; 640 switch (cmd) { 641 case SIOCSIFFLAGS: 642 case SIOCADDMULTI: 643 case SIOCDELMULTI: 644 error = 0; 645 break; 646 647 case SIOCSIFMEDIA: 648 case SIOCGIFMEDIA: 649 sc = ifp->if_softc; 650 error = ifmedia_ioctl(ifp, ifr, &sc->media, cmd); 651 break; 652 653 case SIOCSIFMTU: 654 /* We basically allow all kinds of MTUs. */ 655 ifp->if_mtu = ifr->ifr_mtu; 656 error = 0; 657 break; 658 659 default: 660 /* Let the common ethernet handler process this. */ 661 error = ether_ioctl(ifp, cmd, data); 662 break; 663 } 664 665 return (error); 666} 667 668static void 669epair_init(void *dummy __unused) 670{ 671} 672 673 674/* 675 * Interface cloning functions. 676 * We use our private ones so that we can create/destroy our secondary 677 * device along with the primary one. 678 */ 679static int 680epair_clone_match(struct if_clone *ifc, const char *name) 681{ 682 const char *cp; 683 684 DPRINTF("name='%s'\n", name); 685 686 /* 687 * Our base name is epair. 688 * Our interfaces will be named epair<n>[ab]. 689 * So accept anything of the following list: 690 * - epair 691 * - epair<n> 692 * but not the epair<n>[ab] versions. 693 */ 694 if (strncmp(epairname, name, sizeof(epairname)-1) != 0) 695 return (0); 696 697 for (cp = name + sizeof(epairname) - 1; *cp != '\0'; cp++) { 698 if (*cp < '0' || *cp > '9') 699 return (0); 700 } 701 702 return (1); 703} 704 705static int 706epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) 707{ 708 struct epair_softc *sca, *scb; 709 struct ifnet *ifp; 710 char *dp; 711 int error, unit, wildcard; 712 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ 713 714 /* 715 * We are abusing params to create our second interface. 716 * Actually we already created it and called if_clone_create() 717 * for it to do the official insertion procedure the moment we knew 718 * it cannot fail anymore. So just do attach it here. 719 */ 720 if (params) { 721 scb = (struct epair_softc *)params; 722 ifp = scb->ifp; 723 /* Assign a hopefully unique, locally administered etheraddr. */ 724 eaddr[0] = 0x02; 725 eaddr[3] = (ifp->if_index >> 8) & 0xff; 726 eaddr[4] = ifp->if_index & 0xff; 727 eaddr[5] = 0x0b; 728 ether_ifattach(ifp, eaddr); 729 /* Correctly set the name for the cloner list. */ 730 strlcpy(name, scb->ifp->if_xname, len); 731 return (0); 732 } 733 734 /* Try to see if a special unit was requested. */ 735 error = ifc_name2unit(name, &unit); 736 if (error != 0) 737 return (error); 738 wildcard = (unit < 0); 739 740 error = ifc_alloc_unit(ifc, &unit); 741 if (error != 0) 742 return (error); 743 744 /* 745 * If no unit had been given, we need to adjust the ifName. 746 * Also make sure there is space for our extra [ab] suffix. 747 */ 748 for (dp = name; *dp != '\0'; dp++); 749 if (wildcard) { 750 error = snprintf(dp, len - (dp - name), "%d", unit); 751 if (error > len - (dp - name) - 1) { 752 /* ifName too long. */ 753 ifc_free_unit(ifc, unit); 754 return (ENOSPC); 755 } 756 dp += error; 757 } 758 if (len - (dp - name) - 1 < 1) { 759 /* No space left for our [ab] suffix. */ 760 ifc_free_unit(ifc, unit); 761 return (ENOSPC); 762 } 763 *dp = 'b'; 764 /* Must not change dp so we can replace 'a' by 'b' later. */ 765 *(dp+1) = '\0'; 766 767 /* Check if 'a' and 'b' interfaces already exist. */ 768 if (ifunit(name) != NULL) 769 return (EEXIST); 770 *dp = 'a'; 771 if (ifunit(name) != NULL) 772 return (EEXIST); 773 774 /* Allocate memory for both [ab] interfaces */ 775 sca = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 776 EPAIR_REFCOUNT_INIT(&sca->refcount, 1); 777 sca->ifp = if_alloc(IFT_ETHER); 778 if (sca->ifp == NULL) { 779 free(sca, M_EPAIR); 780 ifc_free_unit(ifc, unit); 781 return (ENOSPC); 782 } 783 784 scb = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 785 EPAIR_REFCOUNT_INIT(&scb->refcount, 1); 786 scb->ifp = if_alloc(IFT_ETHER); 787 if (scb->ifp == NULL) { 788 free(scb, M_EPAIR); 789 if_free(sca->ifp); 790 free(sca, M_EPAIR); 791 ifc_free_unit(ifc, unit); 792 return (ENOSPC); 793 } 794 795 /* 796 * Cross-reference the interfaces so we will be able to free both. 797 */ 798 sca->oifp = scb->ifp; 799 scb->oifp = sca->ifp; 800 801 /* 802 * Calculate the cpuid for netisr queueing based on the 803 * ifIndex of the interfaces. As long as we cannot configure 804 * this or use cpuset information easily we cannot guarantee 805 * cache locality but we can at least allow parallelism. 806 */ 807 sca->cpuid = 808 netisr_get_cpuid(sca->ifp->if_index % netisr_get_cpucount()); 809 scb->cpuid = 810 netisr_get_cpuid(scb->ifp->if_index % netisr_get_cpucount()); 811 812 /* Initialise pseudo media types. */ 813 ifmedia_init(&sca->media, 0, epair_media_change, epair_media_status); 814 ifmedia_add(&sca->media, IFM_ETHER | IFM_10G_T, 0, NULL); 815 ifmedia_set(&sca->media, IFM_ETHER | IFM_10G_T); 816 ifmedia_init(&scb->media, 0, epair_media_change, epair_media_status); 817 ifmedia_add(&scb->media, IFM_ETHER | IFM_10G_T, 0, NULL); 818 ifmedia_set(&scb->media, IFM_ETHER | IFM_10G_T); 819 820 /* Finish initialization of interface <n>a. */ 821 ifp = sca->ifp; 822 ifp->if_softc = sca; 823 strlcpy(ifp->if_xname, name, IFNAMSIZ); 824 ifp->if_dname = epairname; 825 ifp->if_dunit = unit; 826 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 827 ifp->if_capabilities = IFCAP_VLAN_MTU; 828 ifp->if_capenable = IFCAP_VLAN_MTU; 829 ifp->if_start = epair_start; 830 ifp->if_ioctl = epair_ioctl; 831 ifp->if_init = epair_init; 832 ifp->if_snd.ifq_maxlen = ifqmaxlen; 833 /* Assign a hopefully unique, locally administered etheraddr. */ 834 eaddr[0] = 0x02; 835 eaddr[3] = (ifp->if_index >> 8) & 0xff; 836 eaddr[4] = ifp->if_index & 0xff; 837 eaddr[5] = 0x0a; 838 ether_ifattach(ifp, eaddr); 839 sca->if_qflush = ifp->if_qflush; 840 ifp->if_qflush = epair_qflush; 841 ifp->if_transmit = epair_transmit; 842 if_initbaudrate(ifp, IF_Gbps(10)); /* arbitrary maximum */ 843 844 /* Swap the name and finish initialization of interface <n>b. */ 845 *dp = 'b'; 846 847 ifp = scb->ifp; 848 ifp->if_softc = scb; 849 strlcpy(ifp->if_xname, name, IFNAMSIZ); 850 ifp->if_dname = epairname; 851 ifp->if_dunit = unit; 852 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 853 ifp->if_capabilities = IFCAP_VLAN_MTU; 854 ifp->if_capenable = IFCAP_VLAN_MTU; 855 ifp->if_start = epair_start; 856 ifp->if_ioctl = epair_ioctl; 857 ifp->if_init = epair_init; 858 ifp->if_snd.ifq_maxlen = ifqmaxlen; 859 /* We need to play some tricks here for the second interface. */ 860 strlcpy(name, epairname, len); 861 error = if_clone_create(name, len, (caddr_t)scb); 862 if (error) 863 panic("%s: if_clone_create() for our 2nd iface failed: %d", 864 __func__, error); 865 scb->if_qflush = ifp->if_qflush; 866 ifp->if_qflush = epair_qflush; 867 ifp->if_transmit = epair_transmit; 868 if_initbaudrate(ifp, IF_Gbps(10)); /* arbitrary maximum */ 869 870 /* 871 * Restore name to <n>a as the ifp for this will go into the 872 * cloner list for the initial call. 873 */ 874 strlcpy(name, sca->ifp->if_xname, len); 875 DPRINTF("name='%s/%db' created sca=%p scb=%p\n", name, unit, sca, scb); 876 877 /* Tell the world, that we are ready to rock. */ 878 sca->ifp->if_drv_flags |= IFF_DRV_RUNNING; 879 scb->ifp->if_drv_flags |= IFF_DRV_RUNNING; 880 if_link_state_change(sca->ifp, LINK_STATE_UP); 881 if_link_state_change(scb->ifp, LINK_STATE_UP); 882 883 return (0); 884} 885 886static int 887epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) 888{ 889 struct ifnet *oifp; 890 struct epair_softc *sca, *scb; 891 int unit, error; 892 893 DPRINTF("ifp=%p\n", ifp); 894 895 /* 896 * In case we called into if_clone_destroyif() ourselves 897 * again to remove the second interface, the softc will be 898 * NULL. In that case so not do anything but return success. 899 */ 900 if (ifp->if_softc == NULL) 901 return (0); 902 903 unit = ifp->if_dunit; 904 sca = ifp->if_softc; 905 oifp = sca->oifp; 906 scb = oifp->if_softc; 907 908 DPRINTF("ifp=%p oifp=%p\n", ifp, oifp); 909 if_link_state_change(ifp, LINK_STATE_DOWN); 910 if_link_state_change(oifp, LINK_STATE_DOWN); 911 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 912 oifp->if_drv_flags &= ~IFF_DRV_RUNNING; 913 914 /* 915 * Get rid of our second half. As the other of the two 916 * interfaces may reside in a different vnet, we need to 917 * switch before freeing them. 918 */ 919 CURVNET_SET_QUIET(oifp->if_vnet); 920 ether_ifdetach(oifp); 921 /* 922 * Wait for all packets to be dispatched to if_input. 923 * The numbers can only go down as the interface is 924 * detached so there is no need to use atomics. 925 */ 926 DPRINTF("scb refcnt=%u\n", scb->refcount); 927 EPAIR_REFCOUNT_ASSERT(scb->refcount == 1, 928 ("%s: ifp=%p scb->refcount!=1: %d", __func__, oifp, scb->refcount)); 929 oifp->if_softc = NULL; 930 error = if_clone_destroyif(ifc, oifp); 931 if (error) 932 panic("%s: if_clone_destroyif() for our 2nd iface failed: %d", 933 __func__, error); 934 if_free(oifp); 935 ifmedia_removeall(&scb->media); 936 free(scb, M_EPAIR); 937 CURVNET_RESTORE(); 938 939 ether_ifdetach(ifp); 940 /* 941 * Wait for all packets to be dispatched to if_input. 942 */ 943 DPRINTF("sca refcnt=%u\n", sca->refcount); 944 EPAIR_REFCOUNT_ASSERT(sca->refcount == 1, 945 ("%s: ifp=%p sca->refcount!=1: %d", __func__, ifp, sca->refcount)); 946 if_free(ifp); 947 ifmedia_removeall(&sca->media); 948 free(sca, M_EPAIR); 949 ifc_free_unit(ifc, unit); 950 951 return (0); 952} 953 954static void 955vnet_epair_init(const void *unused __unused) 956{ 957 958 V_epair_cloner = if_clone_advanced(epairname, 0, 959 epair_clone_match, epair_clone_create, epair_clone_destroy); 960} 961VNET_SYSINIT(vnet_epair_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, 962 vnet_epair_init, NULL); 963 964static void 965vnet_epair_uninit(const void *unused __unused) 966{ 967 968 if_clone_detach(V_epair_cloner); 969} 970VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, 971 vnet_epair_uninit, NULL); 972 973static int 974epair_modevent(module_t mod, int type, void *data) 975{ 976 int qlimit; 977 978 switch (type) { 979 case MOD_LOAD: 980 /* For now limit us to one global mutex and one inq. */ 981 epair_dpcpu_init(); 982 epair_nh.nh_qlimit = 42 * ifqmaxlen; /* 42 shall be the number. */ 983 if (TUNABLE_INT_FETCH("net.link.epair.netisr_maxqlen", &qlimit)) 984 epair_nh.nh_qlimit = qlimit; 985 netisr_register(&epair_nh); 986 if (bootverbose) 987 printf("%s initialized.\n", epairname); 988 break; 989 case MOD_UNLOAD: 990 netisr_unregister(&epair_nh); 991 epair_dpcpu_detach(); 992 if (bootverbose) 993 printf("%s unloaded.\n", epairname); 994 break; 995 default: 996 return (EOPNOTSUPP); 997 } 998 return (0); 999} 1000 1001static moduledata_t epair_mod = { 1002 "if_epair", 1003 epair_modevent, 1004 0 1005}; 1006 1007DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 1008MODULE_VERSION(if_epair, 1); 1009