1262152Sluigi/* 2262152Sluigi * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. 3262152Sluigi * 4262152Sluigi * Redistribution and use in source and binary forms, with or without 5262152Sluigi * modification, are permitted provided that the following conditions 6262152Sluigi * are met: 7262152Sluigi * 1. Redistributions of source code must retain the above copyright 8262152Sluigi * notice, this list of conditions and the following disclaimer. 9262152Sluigi * 2. Redistributions in binary form must reproduce the above copyright 10262152Sluigi * notice, this list of conditions and the following disclaimer in the 11262152Sluigi * documentation and/or other materials provided with the distribution. 12262152Sluigi * 13262152Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14262152Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15262152Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16262152Sluigi * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17262152Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18262152Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19262152Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20262152Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21262152Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22262152Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23262152Sluigi * SUCH DAMAGE. 24262152Sluigi */ 25262152Sluigi 26262152Sluigi 27262152Sluigi/* 28262152Sluigi * This module implements the VALE switch for netmap 29262152Sluigi 30262152Sluigi--- VALE SWITCH --- 31262152Sluigi 32262152SluigiNMG_LOCK() serializes all modifications to switches and ports. 33262152SluigiA switch cannot be deleted until all ports are gone. 34262152Sluigi 35262152SluigiFor each switch, an SX lock (RWlock on linux) protects 36262152Sluigideletion of ports. When configuring or deleting a new port, the 37262152Sluigilock is acquired in exclusive mode (after holding NMG_LOCK). 38262152SluigiWhen forwarding, the lock is acquired in shared mode (without NMG_LOCK). 39262152SluigiThe lock is held throughout the entire forwarding cycle, 40262152Sluigiduring which the thread may incur in a page fault. 41262152SluigiHence it is important that sleepable shared locks are used. 42262152Sluigi 43262152SluigiOn the rx ring, the per-port lock is grabbed initially to reserve 44262152Sluigia number of slot in the ring, then the lock is released, 45262152Sluigipackets are copied from source to destination, and then 46262152Sluigithe lock is acquired again and the receive ring is updated. 47262152Sluigi(A similar thing is done on the tx ring for NIC and host stack 48262152Sluigiports attached to the switch) 49262152Sluigi 50262152Sluigi */ 51262152Sluigi 52262152Sluigi/* 53262152Sluigi * OS-specific code that is used only within this file. 54262152Sluigi * Other OS-specific code that must be accessed by drivers 55262152Sluigi * is present in netmap_kern.h 56262152Sluigi */ 57262152Sluigi 58262152Sluigi#if defined(__FreeBSD__) 59262152Sluigi#include <sys/cdefs.h> /* prerequisite */ 60262152Sluigi__FBSDID("$FreeBSD$"); 61262152Sluigi 62262152Sluigi#include <sys/types.h> 63262152Sluigi#include <sys/errno.h> 64262152Sluigi#include <sys/param.h> /* defines used in kernel.h */ 65262152Sluigi#include <sys/kernel.h> /* types used in module initialization */ 66262152Sluigi#include <sys/conf.h> /* cdevsw struct, UID, GID */ 67262152Sluigi#include <sys/sockio.h> 68262152Sluigi#include <sys/socketvar.h> /* struct socket */ 69262152Sluigi#include <sys/malloc.h> 70262152Sluigi#include <sys/poll.h> 71262152Sluigi#include <sys/rwlock.h> 72262152Sluigi#include <sys/socket.h> /* sockaddrs */ 73262152Sluigi#include <sys/selinfo.h> 74262152Sluigi#include <sys/sysctl.h> 75262152Sluigi#include <net/if.h> 76262152Sluigi#include <net/if_var.h> 77262152Sluigi#include <net/bpf.h> /* BIOCIMMEDIATE */ 78262152Sluigi#include <machine/bus.h> /* bus_dmamap_* */ 79262152Sluigi#include <sys/endian.h> 80262152Sluigi#include <sys/refcount.h> 81262152Sluigi 82262152Sluigi 83262152Sluigi#define BDG_RWLOCK_T struct rwlock // struct rwlock 84262152Sluigi 85262152Sluigi#define BDG_RWINIT(b) \ 86262152Sluigi rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS) 87262152Sluigi#define BDG_WLOCK(b) rw_wlock(&(b)->bdg_lock) 88262152Sluigi#define BDG_WUNLOCK(b) rw_wunlock(&(b)->bdg_lock) 89262152Sluigi#define BDG_RLOCK(b) rw_rlock(&(b)->bdg_lock) 90262152Sluigi#define BDG_RTRYLOCK(b) rw_try_rlock(&(b)->bdg_lock) 91262152Sluigi#define BDG_RUNLOCK(b) rw_runlock(&(b)->bdg_lock) 92262152Sluigi#define BDG_RWDESTROY(b) rw_destroy(&(b)->bdg_lock) 93262152Sluigi 94262152Sluigi 95262152Sluigi#elif defined(linux) 96262152Sluigi 97262152Sluigi#include "bsd_glue.h" 98262152Sluigi 99262152Sluigi#elif defined(__APPLE__) 100262152Sluigi 101262152Sluigi#warning OSX support is only partial 102262152Sluigi#include "osx_glue.h" 103262152Sluigi 104262152Sluigi#else 105262152Sluigi 106262152Sluigi#error Unsupported platform 107262152Sluigi 108262152Sluigi#endif /* unsupported */ 109262152Sluigi 110262152Sluigi/* 111262152Sluigi * common headers 112262152Sluigi */ 113262152Sluigi 114262152Sluigi#include <net/netmap.h> 115262152Sluigi#include <dev/netmap/netmap_kern.h> 116262152Sluigi#include <dev/netmap/netmap_mem2.h> 117262152Sluigi 118262152Sluigi#ifdef WITH_VALE 119262152Sluigi 120262152Sluigi/* 121262152Sluigi * system parameters (most of them in netmap_kern.h) 122262152Sluigi * NM_NAME prefix for switch port names, default "vale" 123262152Sluigi * NM_BDG_MAXPORTS number of ports 124262152Sluigi * NM_BRIDGES max number of switches in the system. 125262152Sluigi * XXX should become a sysctl or tunable 126262152Sluigi * 127262152Sluigi * Switch ports are named valeX:Y where X is the switch name and Y 128262152Sluigi * is the port. If Y matches a physical interface name, the port is 129262152Sluigi * connected to a physical device. 130262152Sluigi * 131262152Sluigi * Unlike physical interfaces, switch ports use their own memory region 132262152Sluigi * for rings and buffers. 133262152Sluigi * The virtual interfaces use per-queue lock instead of core lock. 134262152Sluigi * In the tx loop, we aggregate traffic in batches to make all operations 135262152Sluigi * faster. The batch size is bridge_batch. 136262152Sluigi */ 137262152Sluigi#define NM_BDG_MAXRINGS 16 /* XXX unclear how many. */ 138262152Sluigi#define NM_BDG_MAXSLOTS 4096 /* XXX same as above */ 139262152Sluigi#define NM_BRIDGE_RINGSIZE 1024 /* in the device */ 140262152Sluigi#define NM_BDG_HASH 1024 /* forwarding table entries */ 141262152Sluigi#define NM_BDG_BATCH 1024 /* entries in the forwarding buffer */ 142262152Sluigi#define NM_MULTISEG 64 /* max size of a chain of bufs */ 143262152Sluigi/* actual size of the tables */ 144262152Sluigi#define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NM_MULTISEG) 145262152Sluigi/* NM_FT_NULL terminates a list of slots in the ft */ 146262152Sluigi#define NM_FT_NULL NM_BDG_BATCH_MAX 147262152Sluigi#define NM_BRIDGES 8 /* number of bridges */ 148262152Sluigi 149262152Sluigi 150262152Sluigi/* 151262152Sluigi * bridge_batch is set via sysctl to the max batch size to be 152262152Sluigi * used in the bridge. The actual value may be larger as the 153262152Sluigi * last packet in the block may overflow the size. 154262152Sluigi */ 155262152Sluigiint bridge_batch = NM_BDG_BATCH; /* bridge batch size */ 156262152SluigiSYSCTL_DECL(_dev_netmap); 157262152SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , ""); 158262152Sluigi 159262152Sluigi 160270252Sluigistatic int netmap_vp_create(struct nmreq *, struct ifnet *, struct netmap_vp_adapter **); 161270252Sluigistatic int netmap_vp_reg(struct netmap_adapter *na, int onoff); 162262152Sluigistatic int netmap_bwrap_register(struct netmap_adapter *, int onoff); 163262152Sluigi 164262152Sluigi/* 165262152Sluigi * For each output interface, nm_bdg_q is used to construct a list. 166262152Sluigi * bq_len is the number of output buffers (we can have coalescing 167262152Sluigi * during the copy). 168262152Sluigi */ 169262152Sluigistruct nm_bdg_q { 170262152Sluigi uint16_t bq_head; 171262152Sluigi uint16_t bq_tail; 172262152Sluigi uint32_t bq_len; /* number of buffers */ 173262152Sluigi}; 174262152Sluigi 175262152Sluigi/* XXX revise this */ 176262152Sluigistruct nm_hash_ent { 177262152Sluigi uint64_t mac; /* the top 2 bytes are the epoch */ 178262152Sluigi uint64_t ports; 179262152Sluigi}; 180262152Sluigi 181262152Sluigi/* 182262152Sluigi * nm_bridge is a descriptor for a VALE switch. 183262152Sluigi * Interfaces for a bridge are all in bdg_ports[]. 184262152Sluigi * The array has fixed size, an empty entry does not terminate 185262152Sluigi * the search, but lookups only occur on attach/detach so we 186262152Sluigi * don't mind if they are slow. 187262152Sluigi * 188262152Sluigi * The bridge is non blocking on the transmit ports: excess 189262152Sluigi * packets are dropped if there is no room on the output port. 190262152Sluigi * 191262152Sluigi * bdg_lock protects accesses to the bdg_ports array. 192262152Sluigi * This is a rw lock (or equivalent). 193262152Sluigi */ 194262152Sluigistruct nm_bridge { 195262152Sluigi /* XXX what is the proper alignment/layout ? */ 196262152Sluigi BDG_RWLOCK_T bdg_lock; /* protects bdg_ports */ 197262152Sluigi int bdg_namelen; 198262152Sluigi uint32_t bdg_active_ports; /* 0 means free */ 199262152Sluigi char bdg_basename[IFNAMSIZ]; 200262152Sluigi 201262152Sluigi /* Indexes of active ports (up to active_ports) 202262152Sluigi * and all other remaining ports. 203262152Sluigi */ 204262152Sluigi uint8_t bdg_port_index[NM_BDG_MAXPORTS]; 205262152Sluigi 206262152Sluigi struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS]; 207262152Sluigi 208262152Sluigi 209262152Sluigi /* 210262152Sluigi * The function to decide the destination port. 211262152Sluigi * It returns either of an index of the destination port, 212262152Sluigi * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to 213262152Sluigi * forward this packet. ring_nr is the source ring index, and the 214262152Sluigi * function may overwrite this value to forward this packet to a 215262152Sluigi * different ring index. 216262152Sluigi * This function must be set by netmap_bdgctl(). 217262152Sluigi */ 218270252Sluigi struct netmap_bdg_ops bdg_ops; 219262152Sluigi 220262152Sluigi /* the forwarding table, MAC+ports. 221262152Sluigi * XXX should be changed to an argument to be passed to 222262152Sluigi * the lookup function, and allocated on attach 223262152Sluigi */ 224262152Sluigi struct nm_hash_ent ht[NM_BDG_HASH]; 225262152Sluigi}; 226262152Sluigi 227270252Sluigiconst char* 228270252Sluiginetmap_bdg_name(struct netmap_vp_adapter *vp) 229270252Sluigi{ 230270252Sluigi struct nm_bridge *b = vp->na_bdg; 231270252Sluigi if (b == NULL) 232270252Sluigi return NULL; 233270252Sluigi return b->bdg_basename; 234270252Sluigi} 235262152Sluigi 236270252Sluigi 237262152Sluigi/* 238262152Sluigi * XXX in principle nm_bridges could be created dynamically 239262152Sluigi * Right now we have a static array and deletions are protected 240262152Sluigi * by an exclusive lock. 241262152Sluigi */ 242262152Sluigistruct nm_bridge nm_bridges[NM_BRIDGES]; 243262152Sluigi 244262152Sluigi 245262152Sluigi/* 246262152Sluigi * this is a slightly optimized copy routine which rounds 247262152Sluigi * to multiple of 64 bytes and is often faster than dealing 248262152Sluigi * with other odd sizes. We assume there is enough room 249262152Sluigi * in the source and destination buffers. 250262152Sluigi * 251262152Sluigi * XXX only for multiples of 64 bytes, non overlapped. 252262152Sluigi */ 253262152Sluigistatic inline void 254262152Sluigipkt_copy(void *_src, void *_dst, int l) 255262152Sluigi{ 256262152Sluigi uint64_t *src = _src; 257262152Sluigi uint64_t *dst = _dst; 258262152Sluigi if (unlikely(l >= 1024)) { 259262152Sluigi memcpy(dst, src, l); 260262152Sluigi return; 261262152Sluigi } 262262152Sluigi for (; likely(l > 0); l-=64) { 263262152Sluigi *dst++ = *src++; 264262152Sluigi *dst++ = *src++; 265262152Sluigi *dst++ = *src++; 266262152Sluigi *dst++ = *src++; 267262152Sluigi *dst++ = *src++; 268262152Sluigi *dst++ = *src++; 269262152Sluigi *dst++ = *src++; 270262152Sluigi *dst++ = *src++; 271262152Sluigi } 272262152Sluigi} 273262152Sluigi 274262152Sluigi 275262152Sluigi/* 276262152Sluigi * locate a bridge among the existing ones. 277262152Sluigi * MUST BE CALLED WITH NMG_LOCK() 278262152Sluigi * 279262152Sluigi * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME. 280262152Sluigi * We assume that this is called with a name of at least NM_NAME chars. 281262152Sluigi */ 282262152Sluigistatic struct nm_bridge * 283262152Sluiginm_find_bridge(const char *name, int create) 284262152Sluigi{ 285262152Sluigi int i, l, namelen; 286262152Sluigi struct nm_bridge *b = NULL; 287262152Sluigi 288262152Sluigi NMG_LOCK_ASSERT(); 289262152Sluigi 290262152Sluigi namelen = strlen(NM_NAME); /* base length */ 291262152Sluigi l = name ? strlen(name) : 0; /* actual length */ 292262152Sluigi if (l < namelen) { 293262152Sluigi D("invalid bridge name %s", name ? name : NULL); 294262152Sluigi return NULL; 295262152Sluigi } 296262152Sluigi for (i = namelen + 1; i < l; i++) { 297262152Sluigi if (name[i] == ':') { 298262152Sluigi namelen = i; 299262152Sluigi break; 300262152Sluigi } 301262152Sluigi } 302262152Sluigi if (namelen >= IFNAMSIZ) 303262152Sluigi namelen = IFNAMSIZ; 304262152Sluigi ND("--- prefix is '%.*s' ---", namelen, name); 305262152Sluigi 306262152Sluigi /* lookup the name, remember empty slot if there is one */ 307262152Sluigi for (i = 0; i < NM_BRIDGES; i++) { 308262152Sluigi struct nm_bridge *x = nm_bridges + i; 309262152Sluigi 310262152Sluigi if (x->bdg_active_ports == 0) { 311262152Sluigi if (create && b == NULL) 312262152Sluigi b = x; /* record empty slot */ 313262152Sluigi } else if (x->bdg_namelen != namelen) { 314262152Sluigi continue; 315262152Sluigi } else if (strncmp(name, x->bdg_basename, namelen) == 0) { 316262152Sluigi ND("found '%.*s' at %d", namelen, name, i); 317262152Sluigi b = x; 318262152Sluigi break; 319262152Sluigi } 320262152Sluigi } 321262152Sluigi if (i == NM_BRIDGES && b) { /* name not found, can create entry */ 322262152Sluigi /* initialize the bridge */ 323262152Sluigi strncpy(b->bdg_basename, name, namelen); 324262152Sluigi ND("create new bridge %s with ports %d", b->bdg_basename, 325262152Sluigi b->bdg_active_ports); 326262152Sluigi b->bdg_namelen = namelen; 327262152Sluigi b->bdg_active_ports = 0; 328262152Sluigi for (i = 0; i < NM_BDG_MAXPORTS; i++) 329262152Sluigi b->bdg_port_index[i] = i; 330262152Sluigi /* set the default function */ 331270252Sluigi b->bdg_ops.lookup = netmap_bdg_learning; 332262152Sluigi /* reset the MAC address table */ 333262152Sluigi bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH); 334262152Sluigi } 335262152Sluigi return b; 336262152Sluigi} 337262152Sluigi 338262152Sluigi 339262152Sluigi/* 340262152Sluigi * Free the forwarding tables for rings attached to switch ports. 341262152Sluigi */ 342262152Sluigistatic void 343262152Sluiginm_free_bdgfwd(struct netmap_adapter *na) 344262152Sluigi{ 345262152Sluigi int nrings, i; 346262152Sluigi struct netmap_kring *kring; 347262152Sluigi 348262152Sluigi NMG_LOCK_ASSERT(); 349262152Sluigi nrings = na->num_tx_rings; 350262152Sluigi kring = na->tx_rings; 351262152Sluigi for (i = 0; i < nrings; i++) { 352262152Sluigi if (kring[i].nkr_ft) { 353262152Sluigi free(kring[i].nkr_ft, M_DEVBUF); 354262152Sluigi kring[i].nkr_ft = NULL; /* protect from freeing twice */ 355262152Sluigi } 356262152Sluigi } 357262152Sluigi} 358262152Sluigi 359262152Sluigi 360262152Sluigi/* 361262152Sluigi * Allocate the forwarding tables for the rings attached to the bridge ports. 362262152Sluigi */ 363262152Sluigistatic int 364262152Sluiginm_alloc_bdgfwd(struct netmap_adapter *na) 365262152Sluigi{ 366262152Sluigi int nrings, l, i, num_dstq; 367262152Sluigi struct netmap_kring *kring; 368262152Sluigi 369262152Sluigi NMG_LOCK_ASSERT(); 370262152Sluigi /* all port:rings + broadcast */ 371262152Sluigi num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1; 372262152Sluigi l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX; 373262152Sluigi l += sizeof(struct nm_bdg_q) * num_dstq; 374262152Sluigi l += sizeof(uint16_t) * NM_BDG_BATCH_MAX; 375262152Sluigi 376262152Sluigi nrings = netmap_real_tx_rings(na); 377262152Sluigi kring = na->tx_rings; 378262152Sluigi for (i = 0; i < nrings; i++) { 379262152Sluigi struct nm_bdg_fwd *ft; 380262152Sluigi struct nm_bdg_q *dstq; 381262152Sluigi int j; 382262152Sluigi 383262152Sluigi ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO); 384262152Sluigi if (!ft) { 385262152Sluigi nm_free_bdgfwd(na); 386262152Sluigi return ENOMEM; 387262152Sluigi } 388262152Sluigi dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); 389262152Sluigi for (j = 0; j < num_dstq; j++) { 390262152Sluigi dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL; 391262152Sluigi dstq[j].bq_len = 0; 392262152Sluigi } 393262152Sluigi kring[i].nkr_ft = ft; 394262152Sluigi } 395262152Sluigi return 0; 396262152Sluigi} 397262152Sluigi 398262152Sluigi 399270252Sluigi/* remove from bridge b the ports in slots hw and sw 400270252Sluigi * (sw can be -1 if not needed) 401270252Sluigi */ 402262152Sluigistatic void 403262152Sluiginetmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw) 404262152Sluigi{ 405262152Sluigi int s_hw = hw, s_sw = sw; 406262152Sluigi int i, lim =b->bdg_active_ports; 407262152Sluigi uint8_t tmp[NM_BDG_MAXPORTS]; 408262152Sluigi 409262152Sluigi /* 410262152Sluigi New algorithm: 411262152Sluigi make a copy of bdg_port_index; 412262152Sluigi lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port 413262152Sluigi in the array of bdg_port_index, replacing them with 414262152Sluigi entries from the bottom of the array; 415262152Sluigi decrement bdg_active_ports; 416262152Sluigi acquire BDG_WLOCK() and copy back the array. 417262152Sluigi */ 418262152Sluigi 419262152Sluigi if (netmap_verbose) 420262152Sluigi D("detach %d and %d (lim %d)", hw, sw, lim); 421262152Sluigi /* make a copy of the list of active ports, update it, 422262152Sluigi * and then copy back within BDG_WLOCK(). 423262152Sluigi */ 424262152Sluigi memcpy(tmp, b->bdg_port_index, sizeof(tmp)); 425262152Sluigi for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) { 426262152Sluigi if (hw >= 0 && tmp[i] == hw) { 427262152Sluigi ND("detach hw %d at %d", hw, i); 428262152Sluigi lim--; /* point to last active port */ 429262152Sluigi tmp[i] = tmp[lim]; /* swap with i */ 430262152Sluigi tmp[lim] = hw; /* now this is inactive */ 431262152Sluigi hw = -1; 432262152Sluigi } else if (sw >= 0 && tmp[i] == sw) { 433262152Sluigi ND("detach sw %d at %d", sw, i); 434262152Sluigi lim--; 435262152Sluigi tmp[i] = tmp[lim]; 436262152Sluigi tmp[lim] = sw; 437262152Sluigi sw = -1; 438262152Sluigi } else { 439262152Sluigi i++; 440262152Sluigi } 441262152Sluigi } 442262152Sluigi if (hw >= 0 || sw >= 0) { 443262152Sluigi D("XXX delete failed hw %d sw %d, should panic...", hw, sw); 444262152Sluigi } 445262152Sluigi 446262152Sluigi BDG_WLOCK(b); 447270252Sluigi if (b->bdg_ops.dtor) 448270252Sluigi b->bdg_ops.dtor(b->bdg_ports[s_hw]); 449262152Sluigi b->bdg_ports[s_hw] = NULL; 450262152Sluigi if (s_sw >= 0) { 451262152Sluigi b->bdg_ports[s_sw] = NULL; 452262152Sluigi } 453262152Sluigi memcpy(b->bdg_port_index, tmp, sizeof(tmp)); 454262152Sluigi b->bdg_active_ports = lim; 455262152Sluigi BDG_WUNLOCK(b); 456262152Sluigi 457262152Sluigi ND("now %d active ports", lim); 458262152Sluigi if (lim == 0) { 459262152Sluigi ND("marking bridge %s as free", b->bdg_basename); 460270252Sluigi bzero(&b->bdg_ops, sizeof(b->bdg_ops)); 461262152Sluigi } 462262152Sluigi} 463262152Sluigi 464270252Sluigi/* nm_bdg_ctl callback for VALE ports */ 465270252Sluigistatic int 466270252Sluiginetmap_vp_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach) 467270252Sluigi{ 468270252Sluigi struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; 469270252Sluigi struct nm_bridge *b = vpna->na_bdg; 470262152Sluigi 471270252Sluigi if (attach) 472270252Sluigi return 0; /* nothing to do */ 473270252Sluigi if (b) { 474270252Sluigi netmap_set_all_rings(na, 0 /* disable */); 475270252Sluigi netmap_bdg_detach_common(b, vpna->bdg_port, -1); 476270252Sluigi vpna->na_bdg = NULL; 477270252Sluigi netmap_set_all_rings(na, 1 /* enable */); 478270252Sluigi } 479270252Sluigi /* I have took reference just for attach */ 480270252Sluigi netmap_adapter_put(na); 481270252Sluigi return 0; 482270252Sluigi} 483270252Sluigi 484270252Sluigi/* nm_dtor callback for ephemeral VALE ports */ 485262152Sluigistatic void 486270252Sluiginetmap_vp_dtor(struct netmap_adapter *na) 487262152Sluigi{ 488262152Sluigi struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na; 489262152Sluigi struct nm_bridge *b = vpna->na_bdg; 490262152Sluigi 491270252Sluigi ND("%s has %d references", na->name, na->na_refcount); 492262152Sluigi 493262152Sluigi if (b) { 494262152Sluigi netmap_bdg_detach_common(b, vpna->bdg_port, -1); 495262152Sluigi } 496270252Sluigi} 497262152Sluigi 498270252Sluigi/* nm_dtor callback for persistent VALE ports */ 499270252Sluigistatic void 500270252Sluiginetmap_persist_vp_dtor(struct netmap_adapter *na) 501270252Sluigi{ 502270252Sluigi struct ifnet *ifp = na->ifp; 503270252Sluigi 504270252Sluigi netmap_vp_dtor(na); 505262152Sluigi na->ifp = NULL; 506270252Sluigi nm_vi_detach(ifp); 507262152Sluigi} 508262152Sluigi 509270252Sluigi/* remove a persistent VALE port from the system */ 510270252Sluigistatic int 511270252Sluiginm_vi_destroy(const char *name) 512270252Sluigi{ 513270252Sluigi struct ifnet *ifp; 514270252Sluigi int error; 515262152Sluigi 516270252Sluigi ifp = ifunit_ref(name); 517270252Sluigi if (!ifp) 518270252Sluigi return ENXIO; 519270252Sluigi NMG_LOCK(); 520270252Sluigi /* make sure this is actually a VALE port */ 521270252Sluigi if (!NETMAP_CAPABLE(ifp) || NA(ifp)->nm_register != netmap_vp_reg) { 522270252Sluigi error = EINVAL; 523270252Sluigi goto err; 524270252Sluigi } 525270252Sluigi 526270252Sluigi if (NA(ifp)->na_refcount > 1) { 527270252Sluigi error = EBUSY; 528270252Sluigi goto err; 529270252Sluigi } 530270252Sluigi NMG_UNLOCK(); 531270252Sluigi 532270252Sluigi D("destroying a persistent vale interface %s", ifp->if_xname); 533270252Sluigi /* Linux requires all the references are released 534270252Sluigi * before unregister 535270252Sluigi */ 536270252Sluigi if_rele(ifp); 537270252Sluigi netmap_detach(ifp); 538270252Sluigi return 0; 539270252Sluigi 540270252Sluigierr: 541270252Sluigi NMG_UNLOCK(); 542270252Sluigi if_rele(ifp); 543270252Sluigi return error; 544270252Sluigi} 545270252Sluigi 546270252Sluigi/* 547270252Sluigi * Create a virtual interface registered to the system. 548270252Sluigi * The interface will be attached to a bridge later. 549270252Sluigi */ 550270252Sluigistatic int 551270252Sluiginm_vi_create(struct nmreq *nmr) 552270252Sluigi{ 553270252Sluigi struct ifnet *ifp; 554270252Sluigi struct netmap_vp_adapter *vpna; 555270252Sluigi int error; 556270252Sluigi 557270252Sluigi /* don't include VALE prefix */ 558270252Sluigi if (!strncmp(nmr->nr_name, NM_NAME, strlen(NM_NAME))) 559270252Sluigi return EINVAL; 560270252Sluigi ifp = ifunit_ref(nmr->nr_name); 561270252Sluigi if (ifp) { /* already exist, cannot create new one */ 562270252Sluigi if_rele(ifp); 563270252Sluigi return EEXIST; 564270252Sluigi } 565270252Sluigi error = nm_vi_persist(nmr->nr_name, &ifp); 566270252Sluigi if (error) 567270252Sluigi return error; 568270252Sluigi 569270252Sluigi NMG_LOCK(); 570270252Sluigi /* netmap_vp_create creates a struct netmap_vp_adapter */ 571270252Sluigi error = netmap_vp_create(nmr, ifp, &vpna); 572270252Sluigi if (error) { 573270252Sluigi D("error %d", error); 574270252Sluigi nm_vi_detach(ifp); 575270252Sluigi return error; 576270252Sluigi } 577270252Sluigi /* persist-specific routines */ 578270252Sluigi vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl; 579270252Sluigi vpna->up.nm_dtor = netmap_persist_vp_dtor; 580270252Sluigi netmap_adapter_get(&vpna->up); 581270252Sluigi NMG_UNLOCK(); 582270252Sluigi D("created %s", ifp->if_xname); 583270252Sluigi return 0; 584270252Sluigi} 585270252Sluigi 586262152Sluigi/* Try to get a reference to a netmap adapter attached to a VALE switch. 587262152Sluigi * If the adapter is found (or is created), this function returns 0, a 588262152Sluigi * non NULL pointer is returned into *na, and the caller holds a 589262152Sluigi * reference to the adapter. 590262152Sluigi * If an adapter is not found, then no reference is grabbed and the 591262152Sluigi * function returns an error code, or 0 if there is just a VALE prefix 592262152Sluigi * mismatch. Therefore the caller holds a reference when 593262152Sluigi * (*na != NULL && return == 0). 594262152Sluigi */ 595262152Sluigiint 596262152Sluiginetmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create) 597262152Sluigi{ 598270252Sluigi char *nr_name = nmr->nr_name; 599270252Sluigi const char *ifname; 600262152Sluigi struct ifnet *ifp; 601262152Sluigi int error = 0; 602270252Sluigi struct netmap_vp_adapter *vpna, *hostna = NULL; 603262152Sluigi struct nm_bridge *b; 604262152Sluigi int i, j, cand = -1, cand2 = -1; 605262152Sluigi int needed; 606262152Sluigi 607262152Sluigi *na = NULL; /* default return value */ 608262152Sluigi 609262152Sluigi /* first try to see if this is a bridge port. */ 610262152Sluigi NMG_LOCK_ASSERT(); 611270252Sluigi if (strncmp(nr_name, NM_NAME, sizeof(NM_NAME) - 1)) { 612262152Sluigi return 0; /* no error, but no VALE prefix */ 613262152Sluigi } 614262152Sluigi 615270252Sluigi b = nm_find_bridge(nr_name, create); 616262152Sluigi if (b == NULL) { 617270252Sluigi D("no bridges available for '%s'", nr_name); 618262152Sluigi return (create ? ENOMEM : ENXIO); 619262152Sluigi } 620270252Sluigi if (strlen(nr_name) < b->bdg_namelen) /* impossible */ 621270252Sluigi panic("x"); 622262152Sluigi 623262152Sluigi /* Now we are sure that name starts with the bridge's name, 624262152Sluigi * lookup the port in the bridge. We need to scan the entire 625262152Sluigi * list. It is not important to hold a WLOCK on the bridge 626262152Sluigi * during the search because NMG_LOCK already guarantees 627262152Sluigi * that there are no other possible writers. 628262152Sluigi */ 629262152Sluigi 630262152Sluigi /* lookup in the local list of ports */ 631262152Sluigi for (j = 0; j < b->bdg_active_ports; j++) { 632262152Sluigi i = b->bdg_port_index[j]; 633262152Sluigi vpna = b->bdg_ports[i]; 634262152Sluigi // KASSERT(na != NULL); 635270252Sluigi D("checking %s", vpna->up.name); 636270252Sluigi if (!strcmp(vpna->up.name, nr_name)) { 637262152Sluigi netmap_adapter_get(&vpna->up); 638270252Sluigi ND("found existing if %s refs %d", nr_name) 639270252Sluigi *na = &vpna->up; 640262152Sluigi return 0; 641262152Sluigi } 642262152Sluigi } 643262152Sluigi /* not found, should we create it? */ 644262152Sluigi if (!create) 645262152Sluigi return ENXIO; 646262152Sluigi /* yes we should, see if we have space to attach entries */ 647262152Sluigi needed = 2; /* in some cases we only need 1 */ 648262152Sluigi if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) { 649262152Sluigi D("bridge full %d, cannot create new port", b->bdg_active_ports); 650262152Sluigi return ENOMEM; 651262152Sluigi } 652262152Sluigi /* record the next two ports available, but do not allocate yet */ 653262152Sluigi cand = b->bdg_port_index[b->bdg_active_ports]; 654262152Sluigi cand2 = b->bdg_port_index[b->bdg_active_ports + 1]; 655262152Sluigi ND("+++ bridge %s port %s used %d avail %d %d", 656270252Sluigi b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2); 657262152Sluigi 658262152Sluigi /* 659262152Sluigi * try see if there is a matching NIC with this name 660262152Sluigi * (after the bridge's name) 661262152Sluigi */ 662270252Sluigi ifname = nr_name + b->bdg_namelen + 1; 663270252Sluigi ifp = ifunit_ref(ifname); 664270252Sluigi if (!ifp) { 665270252Sluigi /* Create an ephemeral virtual port 666270252Sluigi * This block contains all the ephemeral-specific logics 667270252Sluigi */ 668262152Sluigi if (nmr->nr_cmd) { 669262152Sluigi /* nr_cmd must be 0 for a virtual port */ 670262152Sluigi return EINVAL; 671262152Sluigi } 672262152Sluigi 673262152Sluigi /* bdg_netmap_attach creates a struct netmap_adapter */ 674270252Sluigi error = netmap_vp_create(nmr, NULL, &vpna); 675262152Sluigi if (error) { 676262152Sluigi D("error %d", error); 677262152Sluigi free(ifp, M_DEVBUF); 678262152Sluigi return error; 679262152Sluigi } 680270252Sluigi /* shortcut - we can skip get_hw_na(), 681270252Sluigi * ownership check and nm_bdg_attach() 682270252Sluigi */ 683270252Sluigi } else { 684270252Sluigi struct netmap_adapter *hw; 685262152Sluigi 686270252Sluigi error = netmap_get_hw_na(ifp, &hw); 687270252Sluigi if (error || hw == NULL) 688262152Sluigi goto out; 689262152Sluigi 690270252Sluigi /* host adapter might not be created */ 691270252Sluigi error = hw->nm_bdg_attach(nr_name, hw); 692270252Sluigi if (error) 693262152Sluigi goto out; 694270252Sluigi vpna = hw->na_vp; 695270252Sluigi hostna = hw->na_hostvp; 696270252Sluigi if_rele(ifp); 697262152Sluigi if (nmr->nr_arg1 != NETMAP_BDG_HOST) 698270252Sluigi hostna = NULL; 699262152Sluigi } 700262152Sluigi 701262152Sluigi BDG_WLOCK(b); 702262152Sluigi vpna->bdg_port = cand; 703262152Sluigi ND("NIC %p to bridge port %d", vpna, cand); 704262152Sluigi /* bind the port to the bridge (virtual ports are not active) */ 705262152Sluigi b->bdg_ports[cand] = vpna; 706262152Sluigi vpna->na_bdg = b; 707262152Sluigi b->bdg_active_ports++; 708270252Sluigi if (hostna != NULL) { 709262152Sluigi /* also bind the host stack to the bridge */ 710262152Sluigi b->bdg_ports[cand2] = hostna; 711262152Sluigi hostna->bdg_port = cand2; 712262152Sluigi hostna->na_bdg = b; 713262152Sluigi b->bdg_active_ports++; 714262152Sluigi ND("host %p to bridge port %d", hostna, cand2); 715262152Sluigi } 716270252Sluigi ND("if %s refs %d", ifname, vpna->up.na_refcount); 717262152Sluigi BDG_WUNLOCK(b); 718270252Sluigi *na = &vpna->up; 719270252Sluigi netmap_adapter_get(*na); 720262152Sluigi return 0; 721262152Sluigi 722262152Sluigiout: 723262152Sluigi if_rele(ifp); 724262152Sluigi 725262152Sluigi return error; 726262152Sluigi} 727262152Sluigi 728262152Sluigi 729270252Sluigi/* Process NETMAP_BDG_ATTACH */ 730262152Sluigistatic int 731270252Sluiginm_bdg_ctl_attach(struct nmreq *nmr) 732262152Sluigi{ 733262152Sluigi struct netmap_adapter *na; 734262152Sluigi int error; 735262152Sluigi 736262152Sluigi NMG_LOCK(); 737262152Sluigi 738262152Sluigi error = netmap_get_bdg_na(nmr, &na, 1 /* create if not exists */); 739270252Sluigi if (error) /* no device */ 740262152Sluigi goto unlock_exit; 741262152Sluigi 742262152Sluigi if (na == NULL) { /* VALE prefix missing */ 743262152Sluigi error = EINVAL; 744262152Sluigi goto unlock_exit; 745262152Sluigi } 746262152Sluigi 747270252Sluigi if (NETMAP_OWNED_BY_ANY(na)) { 748262152Sluigi error = EBUSY; 749262152Sluigi goto unref_exit; 750262152Sluigi } 751262152Sluigi 752270252Sluigi if (na->nm_bdg_ctl) { 753270252Sluigi /* nop for VALE ports. The bwrap needs to put the hwna 754270252Sluigi * in netmap mode (see netmap_bwrap_bdg_ctl) 755270252Sluigi */ 756270252Sluigi error = na->nm_bdg_ctl(na, nmr, 1); 757270252Sluigi if (error) 758270252Sluigi goto unref_exit; 759270252Sluigi ND("registered %s to netmap-mode", na->name); 760262152Sluigi } 761262152Sluigi NMG_UNLOCK(); 762262152Sluigi return 0; 763262152Sluigi 764262152Sluigiunref_exit: 765262152Sluigi netmap_adapter_put(na); 766262152Sluigiunlock_exit: 767262152Sluigi NMG_UNLOCK(); 768262152Sluigi return error; 769262152Sluigi} 770262152Sluigi 771262152Sluigi 772270252Sluigi/* process NETMAP_BDG_DETACH */ 773262152Sluigistatic int 774270252Sluiginm_bdg_ctl_detach(struct nmreq *nmr) 775262152Sluigi{ 776262152Sluigi struct netmap_adapter *na; 777262152Sluigi int error; 778262152Sluigi 779262152Sluigi NMG_LOCK(); 780262152Sluigi error = netmap_get_bdg_na(nmr, &na, 0 /* don't create */); 781262152Sluigi if (error) { /* no device, or another bridge or user owns the device */ 782262152Sluigi goto unlock_exit; 783262152Sluigi } 784262152Sluigi 785262152Sluigi if (na == NULL) { /* VALE prefix missing */ 786262152Sluigi error = EINVAL; 787262152Sluigi goto unlock_exit; 788262152Sluigi } 789262152Sluigi 790270252Sluigi if (na->nm_bdg_ctl) { 791270252Sluigi /* remove the port from bridge. The bwrap 792270252Sluigi * also needs to put the hwna in normal mode 793270252Sluigi */ 794270252Sluigi error = na->nm_bdg_ctl(na, nmr, 0); 795262152Sluigi } 796262152Sluigi 797262152Sluigi netmap_adapter_put(na); 798262152Sluigiunlock_exit: 799262152Sluigi NMG_UNLOCK(); 800262152Sluigi return error; 801262152Sluigi 802262152Sluigi} 803262152Sluigi 804262152Sluigi 805270252Sluigi/* Called by either user's context (netmap_ioctl()) 806270252Sluigi * or external kernel modules (e.g., Openvswitch). 807270252Sluigi * Operation is indicated in nmr->nr_cmd. 808270252Sluigi * NETMAP_BDG_OPS that sets configure/lookup/dtor functions to the bridge 809270252Sluigi * requires bdg_ops argument; the other commands ignore this argument. 810270252Sluigi * 811262152Sluigi * Called without NMG_LOCK. 812262152Sluigi */ 813262152Sluigiint 814270252Sluiginetmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops) 815262152Sluigi{ 816262152Sluigi struct nm_bridge *b; 817262152Sluigi struct netmap_adapter *na; 818262152Sluigi struct netmap_vp_adapter *vpna; 819262152Sluigi char *name = nmr->nr_name; 820262152Sluigi int cmd = nmr->nr_cmd, namelen = strlen(name); 821262152Sluigi int error = 0, i, j; 822262152Sluigi 823262152Sluigi switch (cmd) { 824270252Sluigi case NETMAP_BDG_NEWIF: 825270252Sluigi error = nm_vi_create(nmr); 826270252Sluigi break; 827270252Sluigi 828270252Sluigi case NETMAP_BDG_DELIF: 829270252Sluigi error = nm_vi_destroy(nmr->nr_name); 830270252Sluigi break; 831270252Sluigi 832262152Sluigi case NETMAP_BDG_ATTACH: 833270252Sluigi error = nm_bdg_ctl_attach(nmr); 834262152Sluigi break; 835262152Sluigi 836262152Sluigi case NETMAP_BDG_DETACH: 837270252Sluigi error = nm_bdg_ctl_detach(nmr); 838262152Sluigi break; 839262152Sluigi 840262152Sluigi case NETMAP_BDG_LIST: 841262152Sluigi /* this is used to enumerate bridges and ports */ 842262152Sluigi if (namelen) { /* look up indexes of bridge and port */ 843262152Sluigi if (strncmp(name, NM_NAME, strlen(NM_NAME))) { 844262152Sluigi error = EINVAL; 845262152Sluigi break; 846262152Sluigi } 847262152Sluigi NMG_LOCK(); 848262152Sluigi b = nm_find_bridge(name, 0 /* don't create */); 849262152Sluigi if (!b) { 850262152Sluigi error = ENOENT; 851262152Sluigi NMG_UNLOCK(); 852262152Sluigi break; 853262152Sluigi } 854262152Sluigi 855270252Sluigi name = name + b->bdg_namelen + 1; 856262152Sluigi error = ENOENT; 857262152Sluigi for (j = 0; j < b->bdg_active_ports; j++) { 858262152Sluigi i = b->bdg_port_index[j]; 859262152Sluigi vpna = b->bdg_ports[i]; 860262152Sluigi if (vpna == NULL) { 861262152Sluigi D("---AAAAAAAAARGH-------"); 862262152Sluigi continue; 863262152Sluigi } 864262152Sluigi /* the former and the latter identify a 865262152Sluigi * virtual port and a NIC, respectively 866262152Sluigi */ 867270252Sluigi if (!strcmp(vpna->up.name, name)) { 868262152Sluigi /* bridge index */ 869262152Sluigi nmr->nr_arg1 = b - nm_bridges; 870262152Sluigi nmr->nr_arg2 = i; /* port index */ 871262152Sluigi error = 0; 872262152Sluigi break; 873262152Sluigi } 874262152Sluigi } 875262152Sluigi NMG_UNLOCK(); 876262152Sluigi } else { 877262152Sluigi /* return the first non-empty entry starting from 878262152Sluigi * bridge nr_arg1 and port nr_arg2. 879262152Sluigi * 880262152Sluigi * Users can detect the end of the same bridge by 881262152Sluigi * seeing the new and old value of nr_arg1, and can 882262152Sluigi * detect the end of all the bridge by error != 0 883262152Sluigi */ 884262152Sluigi i = nmr->nr_arg1; 885262152Sluigi j = nmr->nr_arg2; 886262152Sluigi 887262152Sluigi NMG_LOCK(); 888262152Sluigi for (error = ENOENT; i < NM_BRIDGES; i++) { 889262152Sluigi b = nm_bridges + i; 890262152Sluigi if (j >= b->bdg_active_ports) { 891262152Sluigi j = 0; /* following bridges scan from 0 */ 892262152Sluigi continue; 893262152Sluigi } 894262152Sluigi nmr->nr_arg1 = i; 895262152Sluigi nmr->nr_arg2 = j; 896262152Sluigi j = b->bdg_port_index[j]; 897262152Sluigi vpna = b->bdg_ports[j]; 898270252Sluigi strncpy(name, vpna->up.name, (size_t)IFNAMSIZ); 899262152Sluigi error = 0; 900262152Sluigi break; 901262152Sluigi } 902262152Sluigi NMG_UNLOCK(); 903262152Sluigi } 904262152Sluigi break; 905262152Sluigi 906270252Sluigi case NETMAP_BDG_REGOPS: /* XXX this should not be available from userspace */ 907270252Sluigi /* register callbacks to the given bridge. 908262152Sluigi * nmr->nr_name may be just bridge's name (including ':' 909262152Sluigi * if it is not just NM_NAME). 910262152Sluigi */ 911270252Sluigi if (!bdg_ops) { 912262152Sluigi error = EINVAL; 913262152Sluigi break; 914262152Sluigi } 915262152Sluigi NMG_LOCK(); 916262152Sluigi b = nm_find_bridge(name, 0 /* don't create */); 917262152Sluigi if (!b) { 918262152Sluigi error = EINVAL; 919262152Sluigi } else { 920270252Sluigi b->bdg_ops = *bdg_ops; 921262152Sluigi } 922262152Sluigi NMG_UNLOCK(); 923262152Sluigi break; 924262152Sluigi 925262152Sluigi case NETMAP_BDG_VNET_HDR: 926262152Sluigi /* Valid lengths for the virtio-net header are 0 (no header), 927262152Sluigi 10 and 12. */ 928262152Sluigi if (nmr->nr_arg1 != 0 && 929262152Sluigi nmr->nr_arg1 != sizeof(struct nm_vnet_hdr) && 930262152Sluigi nmr->nr_arg1 != 12) { 931262152Sluigi error = EINVAL; 932262152Sluigi break; 933262152Sluigi } 934262152Sluigi NMG_LOCK(); 935262152Sluigi error = netmap_get_bdg_na(nmr, &na, 0); 936262152Sluigi if (na && !error) { 937262152Sluigi vpna = (struct netmap_vp_adapter *)na; 938262152Sluigi vpna->virt_hdr_len = nmr->nr_arg1; 939262152Sluigi if (vpna->virt_hdr_len) 940270252Sluigi vpna->mfs = NETMAP_BUF_SIZE(na); 941262152Sluigi D("Using vnet_hdr_len %d for %p", vpna->virt_hdr_len, vpna); 942262152Sluigi netmap_adapter_put(na); 943262152Sluigi } 944262152Sluigi NMG_UNLOCK(); 945262152Sluigi break; 946262152Sluigi 947262152Sluigi default: 948262152Sluigi D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd); 949262152Sluigi error = EINVAL; 950262152Sluigi break; 951262152Sluigi } 952262152Sluigi return error; 953262152Sluigi} 954262152Sluigi 955270252Sluigiint 956270252Sluiginetmap_bdg_config(struct nmreq *nmr) 957270252Sluigi{ 958270252Sluigi struct nm_bridge *b; 959270252Sluigi int error = EINVAL; 960270252Sluigi 961270252Sluigi NMG_LOCK(); 962270252Sluigi b = nm_find_bridge(nmr->nr_name, 0); 963270252Sluigi if (!b) { 964270252Sluigi NMG_UNLOCK(); 965270252Sluigi return error; 966270252Sluigi } 967270252Sluigi NMG_UNLOCK(); 968270252Sluigi /* Don't call config() with NMG_LOCK() held */ 969270252Sluigi BDG_RLOCK(b); 970270252Sluigi if (b->bdg_ops.config != NULL) 971270252Sluigi error = b->bdg_ops.config((struct nm_ifreq *)nmr); 972270252Sluigi BDG_RUNLOCK(b); 973270252Sluigi return error; 974270252Sluigi} 975270252Sluigi 976270252Sluigi 977270252Sluigi/* nm_krings_create callback for VALE ports. 978270252Sluigi * Calls the standard netmap_krings_create, then adds leases on rx 979270252Sluigi * rings and bdgfwd on tx rings. 980270252Sluigi */ 981262152Sluigistatic int 982262152Sluiginetmap_vp_krings_create(struct netmap_adapter *na) 983262152Sluigi{ 984262152Sluigi u_int tailroom; 985262152Sluigi int error, i; 986262152Sluigi uint32_t *leases; 987262152Sluigi u_int nrx = netmap_real_rx_rings(na); 988262152Sluigi 989262152Sluigi /* 990262152Sluigi * Leases are attached to RX rings on vale ports 991262152Sluigi */ 992262152Sluigi tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx; 993262152Sluigi 994262152Sluigi error = netmap_krings_create(na, tailroom); 995262152Sluigi if (error) 996262152Sluigi return error; 997262152Sluigi 998262152Sluigi leases = na->tailroom; 999262152Sluigi 1000262152Sluigi for (i = 0; i < nrx; i++) { /* Receive rings */ 1001262152Sluigi na->rx_rings[i].nkr_leases = leases; 1002262152Sluigi leases += na->num_rx_desc; 1003262152Sluigi } 1004262152Sluigi 1005262152Sluigi error = nm_alloc_bdgfwd(na); 1006262152Sluigi if (error) { 1007262152Sluigi netmap_krings_delete(na); 1008262152Sluigi return error; 1009262152Sluigi } 1010262152Sluigi 1011262152Sluigi return 0; 1012262152Sluigi} 1013262152Sluigi 1014262152Sluigi 1015270252Sluigi/* nm_krings_delete callback for VALE ports. */ 1016262152Sluigistatic void 1017262152Sluiginetmap_vp_krings_delete(struct netmap_adapter *na) 1018262152Sluigi{ 1019262152Sluigi nm_free_bdgfwd(na); 1020262152Sluigi netmap_krings_delete(na); 1021262152Sluigi} 1022262152Sluigi 1023262152Sluigi 1024262152Sluigistatic int 1025262152Sluiginm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, 1026262152Sluigi struct netmap_vp_adapter *na, u_int ring_nr); 1027262152Sluigi 1028262152Sluigi 1029262152Sluigi/* 1030270252Sluigi * main dispatch routine for the bridge. 1031262152Sluigi * Grab packets from a kring, move them into the ft structure 1032262152Sluigi * associated to the tx (input) port. Max one instance per port, 1033262152Sluigi * filtered on input (ioctl, poll or XXX). 1034262152Sluigi * Returns the next position in the ring. 1035262152Sluigi */ 1036262152Sluigistatic int 1037270252Sluiginm_bdg_preflush(struct netmap_kring *kring, u_int end) 1038262152Sluigi{ 1039270252Sluigi struct netmap_vp_adapter *na = 1040270252Sluigi (struct netmap_vp_adapter*)kring->na; 1041262152Sluigi struct netmap_ring *ring = kring->ring; 1042262152Sluigi struct nm_bdg_fwd *ft; 1043270252Sluigi u_int ring_nr = kring->ring_id; 1044262152Sluigi u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1; 1045262152Sluigi u_int ft_i = 0; /* start from 0 */ 1046262152Sluigi u_int frags = 1; /* how many frags ? */ 1047262152Sluigi struct nm_bridge *b = na->na_bdg; 1048262152Sluigi 1049262152Sluigi /* To protect against modifications to the bridge we acquire a 1050262152Sluigi * shared lock, waiting if we can sleep (if the source port is 1051262152Sluigi * attached to a user process) or with a trylock otherwise (NICs). 1052262152Sluigi */ 1053262152Sluigi ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j); 1054262152Sluigi if (na->up.na_flags & NAF_BDG_MAYSLEEP) 1055262152Sluigi BDG_RLOCK(b); 1056262152Sluigi else if (!BDG_RTRYLOCK(b)) 1057262152Sluigi return 0; 1058262152Sluigi ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j); 1059262152Sluigi ft = kring->nkr_ft; 1060262152Sluigi 1061262152Sluigi for (; likely(j != end); j = nm_next(j, lim)) { 1062262152Sluigi struct netmap_slot *slot = &ring->slot[j]; 1063262152Sluigi char *buf; 1064262152Sluigi 1065262152Sluigi ft[ft_i].ft_len = slot->len; 1066262152Sluigi ft[ft_i].ft_flags = slot->flags; 1067262152Sluigi 1068262152Sluigi ND("flags is 0x%x", slot->flags); 1069262152Sluigi /* this slot goes into a list so initialize the link field */ 1070262152Sluigi ft[ft_i].ft_next = NM_FT_NULL; 1071262152Sluigi buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ? 1072270252Sluigi (void *)(uintptr_t)slot->ptr : NMB(&na->up, slot); 1073267282Sluigi if (unlikely(buf == NULL)) { 1074267282Sluigi RD(5, "NULL %s buffer pointer from %s slot %d len %d", 1075267282Sluigi (slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT", 1076267282Sluigi kring->name, j, ft[ft_i].ft_len); 1077270252Sluigi buf = ft[ft_i].ft_buf = NETMAP_BUF_BASE(&na->up); 1078267282Sluigi ft[ft_i].ft_len = 0; 1079267282Sluigi ft[ft_i].ft_flags = 0; 1080267282Sluigi } 1081262152Sluigi __builtin_prefetch(buf); 1082262152Sluigi ++ft_i; 1083262152Sluigi if (slot->flags & NS_MOREFRAG) { 1084262152Sluigi frags++; 1085262152Sluigi continue; 1086262152Sluigi } 1087262152Sluigi if (unlikely(netmap_verbose && frags > 1)) 1088262152Sluigi RD(5, "%d frags at %d", frags, ft_i - frags); 1089262152Sluigi ft[ft_i - frags].ft_frags = frags; 1090262152Sluigi frags = 1; 1091262152Sluigi if (unlikely((int)ft_i >= bridge_batch)) 1092262152Sluigi ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); 1093262152Sluigi } 1094262152Sluigi if (frags > 1) { 1095262152Sluigi D("truncate incomplete fragment at %d (%d frags)", ft_i, frags); 1096262152Sluigi // ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG 1097262152Sluigi ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG; 1098262152Sluigi ft[ft_i - frags].ft_frags = frags - 1; 1099262152Sluigi } 1100262152Sluigi if (ft_i) 1101262152Sluigi ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); 1102262152Sluigi BDG_RUNLOCK(b); 1103262152Sluigi return j; 1104262152Sluigi} 1105262152Sluigi 1106262152Sluigi 1107262152Sluigi/* ----- FreeBSD if_bridge hash function ------- */ 1108262152Sluigi 1109262152Sluigi/* 1110262152Sluigi * The following hash function is adapted from "Hash Functions" by Bob Jenkins 1111262152Sluigi * ("Algorithm Alley", Dr. Dobbs Journal, September 1997). 1112262152Sluigi * 1113262152Sluigi * http://www.burtleburtle.net/bob/hash/spooky.html 1114262152Sluigi */ 1115262152Sluigi#define mix(a, b, c) \ 1116262152Sluigido { \ 1117262152Sluigi a -= b; a -= c; a ^= (c >> 13); \ 1118262152Sluigi b -= c; b -= a; b ^= (a << 8); \ 1119262152Sluigi c -= a; c -= b; c ^= (b >> 13); \ 1120262152Sluigi a -= b; a -= c; a ^= (c >> 12); \ 1121262152Sluigi b -= c; b -= a; b ^= (a << 16); \ 1122262152Sluigi c -= a; c -= b; c ^= (b >> 5); \ 1123262152Sluigi a -= b; a -= c; a ^= (c >> 3); \ 1124262152Sluigi b -= c; b -= a; b ^= (a << 10); \ 1125262152Sluigi c -= a; c -= b; c ^= (b >> 15); \ 1126262152Sluigi} while (/*CONSTCOND*/0) 1127262152Sluigi 1128262152Sluigi 1129262152Sluigistatic __inline uint32_t 1130262152Sluiginm_bridge_rthash(const uint8_t *addr) 1131262152Sluigi{ 1132262152Sluigi uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key 1133262152Sluigi 1134262152Sluigi b += addr[5] << 8; 1135262152Sluigi b += addr[4]; 1136262152Sluigi a += addr[3] << 24; 1137262152Sluigi a += addr[2] << 16; 1138262152Sluigi a += addr[1] << 8; 1139262152Sluigi a += addr[0]; 1140262152Sluigi 1141262152Sluigi mix(a, b, c); 1142262152Sluigi#define BRIDGE_RTHASH_MASK (NM_BDG_HASH-1) 1143262152Sluigi return (c & BRIDGE_RTHASH_MASK); 1144262152Sluigi} 1145262152Sluigi 1146262152Sluigi#undef mix 1147262152Sluigi 1148262152Sluigi 1149270252Sluigi/* nm_register callback for VALE ports */ 1150262152Sluigistatic int 1151270252Sluiginetmap_vp_reg(struct netmap_adapter *na, int onoff) 1152262152Sluigi{ 1153262152Sluigi struct netmap_vp_adapter *vpna = 1154262152Sluigi (struct netmap_vp_adapter*)na; 1155262152Sluigi 1156270252Sluigi /* persistent ports may be put in netmap mode 1157270252Sluigi * before being attached to a bridge 1158262152Sluigi */ 1159270252Sluigi if (vpna->na_bdg) 1160270252Sluigi BDG_WLOCK(vpna->na_bdg); 1161262152Sluigi if (onoff) { 1162270252Sluigi na->na_flags |= NAF_NETMAP_ON; 1163270252Sluigi /* XXX on FreeBSD, persistent VALE ports should also 1164270252Sluigi * toggle IFCAP_NETMAP in na->ifp (2014-03-16) 1165270252Sluigi */ 1166262152Sluigi } else { 1167270252Sluigi na->na_flags &= ~NAF_NETMAP_ON; 1168262152Sluigi } 1169270252Sluigi if (vpna->na_bdg) 1170270252Sluigi BDG_WUNLOCK(vpna->na_bdg); 1171262152Sluigi return 0; 1172262152Sluigi} 1173262152Sluigi 1174262152Sluigi 1175262152Sluigi/* 1176262152Sluigi * Lookup function for a learning bridge. 1177262152Sluigi * Update the hash table with the source address, 1178262152Sluigi * and then returns the destination port index, and the 1179262152Sluigi * ring in *dst_ring (at the moment, always use ring 0) 1180262152Sluigi */ 1181262152Sluigiu_int 1182270252Sluiginetmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, 1183270252Sluigi const struct netmap_vp_adapter *na) 1184262152Sluigi{ 1185270252Sluigi uint8_t *buf = ft->ft_buf; 1186270252Sluigi u_int buf_len = ft->ft_len; 1187262152Sluigi struct nm_hash_ent *ht = na->na_bdg->ht; 1188262152Sluigi uint32_t sh, dh; 1189262152Sluigi u_int dst, mysrc = na->bdg_port; 1190262152Sluigi uint64_t smac, dmac; 1191262152Sluigi 1192270252Sluigi /* safety check, unfortunately we have many cases */ 1193270252Sluigi if (buf_len >= 14 + na->virt_hdr_len) { 1194270252Sluigi /* virthdr + mac_hdr in the same slot */ 1195270252Sluigi buf += na->virt_hdr_len; 1196270252Sluigi buf_len -= na->virt_hdr_len; 1197270252Sluigi } else if (buf_len == na->virt_hdr_len && ft->ft_flags & NS_MOREFRAG) { 1198270252Sluigi /* only header in first fragment */ 1199270252Sluigi ft++; 1200270252Sluigi buf = ft->ft_buf; 1201270252Sluigi buf_len = ft->ft_len; 1202270252Sluigi } else { 1203270252Sluigi RD(5, "invalid buf format, length %d", buf_len); 1204262152Sluigi return NM_BDG_NOPORT; 1205262152Sluigi } 1206262152Sluigi dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff; 1207262152Sluigi smac = le64toh(*(uint64_t *)(buf + 4)); 1208262152Sluigi smac >>= 16; 1209262152Sluigi 1210262152Sluigi /* 1211262152Sluigi * The hash is somewhat expensive, there might be some 1212262152Sluigi * worthwhile optimizations here. 1213262152Sluigi */ 1214262152Sluigi if ((buf[6] & 1) == 0) { /* valid src */ 1215262152Sluigi uint8_t *s = buf+6; 1216262152Sluigi sh = nm_bridge_rthash(s); // XXX hash of source 1217262152Sluigi /* update source port forwarding entry */ 1218262152Sluigi ht[sh].mac = smac; /* XXX expire ? */ 1219262152Sluigi ht[sh].ports = mysrc; 1220262152Sluigi if (netmap_verbose) 1221262152Sluigi D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d", 1222262152Sluigi s[0], s[1], s[2], s[3], s[4], s[5], mysrc); 1223262152Sluigi } 1224262152Sluigi dst = NM_BDG_BROADCAST; 1225262152Sluigi if ((buf[0] & 1) == 0) { /* unicast */ 1226262152Sluigi dh = nm_bridge_rthash(buf); // XXX hash of dst 1227262152Sluigi if (ht[dh].mac == dmac) { /* found dst */ 1228262152Sluigi dst = ht[dh].ports; 1229262152Sluigi } 1230262152Sluigi /* XXX otherwise return NM_BDG_UNKNOWN ? */ 1231262152Sluigi } 1232262152Sluigi *dst_ring = 0; 1233262152Sluigi return dst; 1234262152Sluigi} 1235262152Sluigi 1236262152Sluigi 1237262152Sluigi/* 1238262152Sluigi * Available space in the ring. Only used in VALE code 1239262152Sluigi * and only with is_rx = 1 1240262152Sluigi */ 1241262152Sluigistatic inline uint32_t 1242262152Sluiginm_kr_space(struct netmap_kring *k, int is_rx) 1243262152Sluigi{ 1244262152Sluigi int space; 1245262152Sluigi 1246262152Sluigi if (is_rx) { 1247262152Sluigi int busy = k->nkr_hwlease - k->nr_hwcur; 1248262152Sluigi if (busy < 0) 1249262152Sluigi busy += k->nkr_num_slots; 1250262152Sluigi space = k->nkr_num_slots - 1 - busy; 1251262152Sluigi } else { 1252262152Sluigi /* XXX never used in this branch */ 1253262152Sluigi space = k->nr_hwtail - k->nkr_hwlease; 1254262152Sluigi if (space < 0) 1255262152Sluigi space += k->nkr_num_slots; 1256262152Sluigi } 1257262152Sluigi#if 0 1258262152Sluigi // sanity check 1259262152Sluigi if (k->nkr_hwlease >= k->nkr_num_slots || 1260262152Sluigi k->nr_hwcur >= k->nkr_num_slots || 1261262152Sluigi k->nr_tail >= k->nkr_num_slots || 1262262152Sluigi busy < 0 || 1263262152Sluigi busy >= k->nkr_num_slots) { 1264262152Sluigi D("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d", k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease, 1265262152Sluigi k->nkr_lease_idx, k->nkr_num_slots); 1266262152Sluigi } 1267262152Sluigi#endif 1268262152Sluigi return space; 1269262152Sluigi} 1270262152Sluigi 1271262152Sluigi 1272262152Sluigi 1273262152Sluigi 1274262152Sluigi/* make a lease on the kring for N positions. return the 1275262152Sluigi * lease index 1276262152Sluigi * XXX only used in VALE code and with is_rx = 1 1277262152Sluigi */ 1278262152Sluigistatic inline uint32_t 1279262152Sluiginm_kr_lease(struct netmap_kring *k, u_int n, int is_rx) 1280262152Sluigi{ 1281262152Sluigi uint32_t lim = k->nkr_num_slots - 1; 1282262152Sluigi uint32_t lease_idx = k->nkr_lease_idx; 1283262152Sluigi 1284262152Sluigi k->nkr_leases[lease_idx] = NR_NOSLOT; 1285262152Sluigi k->nkr_lease_idx = nm_next(lease_idx, lim); 1286262152Sluigi 1287262152Sluigi if (n > nm_kr_space(k, is_rx)) { 1288262152Sluigi D("invalid request for %d slots", n); 1289262152Sluigi panic("x"); 1290262152Sluigi } 1291262152Sluigi /* XXX verify that there are n slots */ 1292262152Sluigi k->nkr_hwlease += n; 1293262152Sluigi if (k->nkr_hwlease > lim) 1294262152Sluigi k->nkr_hwlease -= lim + 1; 1295262152Sluigi 1296262152Sluigi if (k->nkr_hwlease >= k->nkr_num_slots || 1297262152Sluigi k->nr_hwcur >= k->nkr_num_slots || 1298262152Sluigi k->nr_hwtail >= k->nkr_num_slots || 1299262152Sluigi k->nkr_lease_idx >= k->nkr_num_slots) { 1300262152Sluigi D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d", 1301270252Sluigi k->na->name, 1302262152Sluigi k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease, 1303262152Sluigi k->nkr_lease_idx, k->nkr_num_slots); 1304262152Sluigi } 1305262152Sluigi return lease_idx; 1306262152Sluigi} 1307262152Sluigi 1308262152Sluigi/* 1309270252Sluigi * 1310262152Sluigi * This flush routine supports only unicast and broadcast but a large 1311262152Sluigi * number of ports, and lets us replace the learn and dispatch functions. 1312262152Sluigi */ 1313262152Sluigiint 1314262152Sluiginm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, 1315262152Sluigi u_int ring_nr) 1316262152Sluigi{ 1317262152Sluigi struct nm_bdg_q *dst_ents, *brddst; 1318262152Sluigi uint16_t num_dsts = 0, *dsts; 1319262152Sluigi struct nm_bridge *b = na->na_bdg; 1320262152Sluigi u_int i, j, me = na->bdg_port; 1321262152Sluigi 1322262152Sluigi /* 1323262152Sluigi * The work area (pointed by ft) is followed by an array of 1324262152Sluigi * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS 1325262152Sluigi * queues per port plus one for the broadcast traffic. 1326262152Sluigi * Then we have an array of destination indexes. 1327262152Sluigi */ 1328262152Sluigi dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); 1329262152Sluigi dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1); 1330262152Sluigi 1331262152Sluigi /* first pass: find a destination for each packet in the batch */ 1332262152Sluigi for (i = 0; likely(i < n); i += ft[i].ft_frags) { 1333262152Sluigi uint8_t dst_ring = ring_nr; /* default, same ring as origin */ 1334262152Sluigi uint16_t dst_port, d_i; 1335262152Sluigi struct nm_bdg_q *d; 1336262152Sluigi 1337262152Sluigi ND("slot %d frags %d", i, ft[i].ft_frags); 1338262152Sluigi /* Drop the packet if the virtio-net header is not into the first 1339262152Sluigi fragment nor at the very beginning of the second. */ 1340270252Sluigi if (unlikely(na->virt_hdr_len > ft[i].ft_len)) 1341262152Sluigi continue; 1342270252Sluigi dst_port = b->bdg_ops.lookup(&ft[i], &dst_ring, na); 1343262152Sluigi if (netmap_verbose > 255) 1344262152Sluigi RD(5, "slot %d port %d -> %d", i, me, dst_port); 1345262152Sluigi if (dst_port == NM_BDG_NOPORT) 1346262152Sluigi continue; /* this packet is identified to be dropped */ 1347262152Sluigi else if (unlikely(dst_port > NM_BDG_MAXPORTS)) 1348262152Sluigi continue; 1349262152Sluigi else if (dst_port == NM_BDG_BROADCAST) 1350262152Sluigi dst_ring = 0; /* broadcasts always go to ring 0 */ 1351262152Sluigi else if (unlikely(dst_port == me || 1352262152Sluigi !b->bdg_ports[dst_port])) 1353262152Sluigi continue; 1354262152Sluigi 1355262152Sluigi /* get a position in the scratch pad */ 1356262152Sluigi d_i = dst_port * NM_BDG_MAXRINGS + dst_ring; 1357262152Sluigi d = dst_ents + d_i; 1358262152Sluigi 1359262152Sluigi /* append the first fragment to the list */ 1360262152Sluigi if (d->bq_head == NM_FT_NULL) { /* new destination */ 1361262152Sluigi d->bq_head = d->bq_tail = i; 1362262152Sluigi /* remember this position to be scanned later */ 1363262152Sluigi if (dst_port != NM_BDG_BROADCAST) 1364262152Sluigi dsts[num_dsts++] = d_i; 1365262152Sluigi } else { 1366262152Sluigi ft[d->bq_tail].ft_next = i; 1367262152Sluigi d->bq_tail = i; 1368262152Sluigi } 1369262152Sluigi d->bq_len += ft[i].ft_frags; 1370262152Sluigi } 1371262152Sluigi 1372262152Sluigi /* 1373262152Sluigi * Broadcast traffic goes to ring 0 on all destinations. 1374262152Sluigi * So we need to add these rings to the list of ports to scan. 1375262152Sluigi * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is 1376262152Sluigi * expensive. We should keep a compact list of active destinations 1377262152Sluigi * so we could shorten this loop. 1378262152Sluigi */ 1379262152Sluigi brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS; 1380262152Sluigi if (brddst->bq_head != NM_FT_NULL) { 1381262152Sluigi for (j = 0; likely(j < b->bdg_active_ports); j++) { 1382262152Sluigi uint16_t d_i; 1383262152Sluigi i = b->bdg_port_index[j]; 1384262152Sluigi if (unlikely(i == me)) 1385262152Sluigi continue; 1386262152Sluigi d_i = i * NM_BDG_MAXRINGS; 1387262152Sluigi if (dst_ents[d_i].bq_head == NM_FT_NULL) 1388262152Sluigi dsts[num_dsts++] = d_i; 1389262152Sluigi } 1390262152Sluigi } 1391262152Sluigi 1392262152Sluigi ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts); 1393270252Sluigi /* second pass: scan destinations */ 1394262152Sluigi for (i = 0; i < num_dsts; i++) { 1395262152Sluigi struct netmap_vp_adapter *dst_na; 1396262152Sluigi struct netmap_kring *kring; 1397262152Sluigi struct netmap_ring *ring; 1398262152Sluigi u_int dst_nr, lim, j, d_i, next, brd_next; 1399262152Sluigi u_int needed, howmany; 1400262152Sluigi int retry = netmap_txsync_retry; 1401262152Sluigi struct nm_bdg_q *d; 1402262152Sluigi uint32_t my_start = 0, lease_idx = 0; 1403262152Sluigi int nrings; 1404262152Sluigi int virt_hdr_mismatch = 0; 1405262152Sluigi 1406262152Sluigi d_i = dsts[i]; 1407262152Sluigi ND("second pass %d port %d", i, d_i); 1408262152Sluigi d = dst_ents + d_i; 1409262152Sluigi // XXX fix the division 1410262152Sluigi dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS]; 1411262152Sluigi /* protect from the lookup function returning an inactive 1412262152Sluigi * destination port 1413262152Sluigi */ 1414262152Sluigi if (unlikely(dst_na == NULL)) 1415262152Sluigi goto cleanup; 1416262152Sluigi if (dst_na->up.na_flags & NAF_SW_ONLY) 1417262152Sluigi goto cleanup; 1418262152Sluigi /* 1419262152Sluigi * The interface may be in !netmap mode in two cases: 1420262152Sluigi * - when na is attached but not activated yet; 1421262152Sluigi * - when na is being deactivated but is still attached. 1422262152Sluigi */ 1423270252Sluigi if (unlikely(!nm_netmap_on(&dst_na->up))) { 1424262152Sluigi ND("not in netmap mode!"); 1425262152Sluigi goto cleanup; 1426262152Sluigi } 1427262152Sluigi 1428262152Sluigi /* there is at least one either unicast or broadcast packet */ 1429262152Sluigi brd_next = brddst->bq_head; 1430262152Sluigi next = d->bq_head; 1431262152Sluigi /* we need to reserve this many slots. If fewer are 1432262152Sluigi * available, some packets will be dropped. 1433262152Sluigi * Packets may have multiple fragments, so we may not use 1434262152Sluigi * there is a chance that we may not use all of the slots 1435262152Sluigi * we have claimed, so we will need to handle the leftover 1436262152Sluigi * ones when we regain the lock. 1437262152Sluigi */ 1438262152Sluigi needed = d->bq_len + brddst->bq_len; 1439262152Sluigi 1440262152Sluigi if (unlikely(dst_na->virt_hdr_len != na->virt_hdr_len)) { 1441270252Sluigi RD(3, "virt_hdr_mismatch, src %d dst %d", na->virt_hdr_len, dst_na->virt_hdr_len); 1442262152Sluigi /* There is a virtio-net header/offloadings mismatch between 1443262152Sluigi * source and destination. The slower mismatch datapath will 1444262152Sluigi * be used to cope with all the mismatches. 1445262152Sluigi */ 1446262152Sluigi virt_hdr_mismatch = 1; 1447262152Sluigi if (dst_na->mfs < na->mfs) { 1448262152Sluigi /* We may need to do segmentation offloadings, and so 1449262152Sluigi * we may need a number of destination slots greater 1450262152Sluigi * than the number of input slots ('needed'). 1451262152Sluigi * We look for the smallest integer 'x' which satisfies: 1452262152Sluigi * needed * na->mfs + x * H <= x * na->mfs 1453262152Sluigi * where 'H' is the length of the longest header that may 1454262152Sluigi * be replicated in the segmentation process (e.g. for 1455262152Sluigi * TCPv4 we must account for ethernet header, IP header 1456262152Sluigi * and TCPv4 header). 1457262152Sluigi */ 1458262152Sluigi needed = (needed * na->mfs) / 1459262152Sluigi (dst_na->mfs - WORST_CASE_GSO_HEADER) + 1; 1460262152Sluigi ND(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed); 1461262152Sluigi } 1462262152Sluigi } 1463262152Sluigi 1464262152Sluigi ND(5, "pass 2 dst %d is %x %s", 1465262152Sluigi i, d_i, is_vp ? "virtual" : "nic/host"); 1466262152Sluigi dst_nr = d_i & (NM_BDG_MAXRINGS-1); 1467262152Sluigi nrings = dst_na->up.num_rx_rings; 1468262152Sluigi if (dst_nr >= nrings) 1469262152Sluigi dst_nr = dst_nr % nrings; 1470262152Sluigi kring = &dst_na->up.rx_rings[dst_nr]; 1471262152Sluigi ring = kring->ring; 1472262152Sluigi lim = kring->nkr_num_slots - 1; 1473262152Sluigi 1474262152Sluigiretry: 1475262152Sluigi 1476262152Sluigi if (dst_na->retry && retry) { 1477262152Sluigi /* try to get some free slot from the previous run */ 1478262152Sluigi dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0); 1479270252Sluigi /* actually useful only for bwraps, since there 1480270252Sluigi * the notify will trigger a txsync on the hwna. VALE ports 1481270252Sluigi * have dst_na->retry == 0 1482270252Sluigi */ 1483262152Sluigi } 1484262152Sluigi /* reserve the buffers in the queue and an entry 1485262152Sluigi * to report completion, and drop lock. 1486262152Sluigi * XXX this might become a helper function. 1487262152Sluigi */ 1488262152Sluigi mtx_lock(&kring->q_lock); 1489262152Sluigi if (kring->nkr_stopped) { 1490262152Sluigi mtx_unlock(&kring->q_lock); 1491262152Sluigi goto cleanup; 1492262152Sluigi } 1493262152Sluigi my_start = j = kring->nkr_hwlease; 1494262152Sluigi howmany = nm_kr_space(kring, 1); 1495262152Sluigi if (needed < howmany) 1496262152Sluigi howmany = needed; 1497262152Sluigi lease_idx = nm_kr_lease(kring, howmany, 1); 1498262152Sluigi mtx_unlock(&kring->q_lock); 1499262152Sluigi 1500262152Sluigi /* only retry if we need more than available slots */ 1501262152Sluigi if (retry && needed <= howmany) 1502262152Sluigi retry = 0; 1503262152Sluigi 1504262152Sluigi /* copy to the destination queue */ 1505262152Sluigi while (howmany > 0) { 1506262152Sluigi struct netmap_slot *slot; 1507262152Sluigi struct nm_bdg_fwd *ft_p, *ft_end; 1508262152Sluigi u_int cnt; 1509262152Sluigi 1510262152Sluigi /* find the queue from which we pick next packet. 1511262152Sluigi * NM_FT_NULL is always higher than valid indexes 1512262152Sluigi * so we never dereference it if the other list 1513262152Sluigi * has packets (and if both are empty we never 1514262152Sluigi * get here). 1515262152Sluigi */ 1516262152Sluigi if (next < brd_next) { 1517262152Sluigi ft_p = ft + next; 1518262152Sluigi next = ft_p->ft_next; 1519262152Sluigi } else { /* insert broadcast */ 1520262152Sluigi ft_p = ft + brd_next; 1521262152Sluigi brd_next = ft_p->ft_next; 1522262152Sluigi } 1523262152Sluigi cnt = ft_p->ft_frags; // cnt > 0 1524262152Sluigi if (unlikely(cnt > howmany)) 1525262152Sluigi break; /* no more space */ 1526262152Sluigi if (netmap_verbose && cnt > 1) 1527262152Sluigi RD(5, "rx %d frags to %d", cnt, j); 1528262152Sluigi ft_end = ft_p + cnt; 1529262152Sluigi if (unlikely(virt_hdr_mismatch)) { 1530262152Sluigi bdg_mismatch_datapath(na, dst_na, ft_p, ring, &j, lim, &howmany); 1531262152Sluigi } else { 1532262152Sluigi howmany -= cnt; 1533262152Sluigi do { 1534262152Sluigi char *dst, *src = ft_p->ft_buf; 1535262152Sluigi size_t copy_len = ft_p->ft_len, dst_len = copy_len; 1536262152Sluigi 1537262152Sluigi slot = &ring->slot[j]; 1538270252Sluigi dst = NMB(&dst_na->up, slot); 1539262152Sluigi 1540262152Sluigi ND("send [%d] %d(%d) bytes at %s:%d", 1541262152Sluigi i, (int)copy_len, (int)dst_len, 1542262152Sluigi NM_IFPNAME(dst_ifp), j); 1543262152Sluigi /* round to a multiple of 64 */ 1544262152Sluigi copy_len = (copy_len + 63) & ~63; 1545262152Sluigi 1546270252Sluigi if (unlikely(copy_len > NETMAP_BUF_SIZE(&dst_na->up) || 1547270252Sluigi copy_len > NETMAP_BUF_SIZE(&na->up))) { 1548267282Sluigi RD(5, "invalid len %d, down to 64", (int)copy_len); 1549267282Sluigi copy_len = dst_len = 64; // XXX 1550267282Sluigi } 1551262152Sluigi if (ft_p->ft_flags & NS_INDIRECT) { 1552262152Sluigi if (copyin(src, dst, copy_len)) { 1553262152Sluigi // invalid user pointer, pretend len is 0 1554262152Sluigi dst_len = 0; 1555262152Sluigi } 1556262152Sluigi } else { 1557262152Sluigi //memcpy(dst, src, copy_len); 1558262152Sluigi pkt_copy(src, dst, (int)copy_len); 1559262152Sluigi } 1560262152Sluigi slot->len = dst_len; 1561262152Sluigi slot->flags = (cnt << 8)| NS_MOREFRAG; 1562262152Sluigi j = nm_next(j, lim); 1563262152Sluigi needed--; 1564262152Sluigi ft_p++; 1565262152Sluigi } while (ft_p != ft_end); 1566262152Sluigi slot->flags = (cnt << 8); /* clear flag on last entry */ 1567262152Sluigi } 1568262152Sluigi /* are we done ? */ 1569262152Sluigi if (next == NM_FT_NULL && brd_next == NM_FT_NULL) 1570262152Sluigi break; 1571262152Sluigi } 1572262152Sluigi { 1573262152Sluigi /* current position */ 1574262152Sluigi uint32_t *p = kring->nkr_leases; /* shorthand */ 1575262152Sluigi uint32_t update_pos; 1576262152Sluigi int still_locked = 1; 1577262152Sluigi 1578262152Sluigi mtx_lock(&kring->q_lock); 1579262152Sluigi if (unlikely(howmany > 0)) { 1580262152Sluigi /* not used all bufs. If i am the last one 1581262152Sluigi * i can recover the slots, otherwise must 1582262152Sluigi * fill them with 0 to mark empty packets. 1583262152Sluigi */ 1584262152Sluigi ND("leftover %d bufs", howmany); 1585262152Sluigi if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) { 1586262152Sluigi /* yes i am the last one */ 1587262152Sluigi ND("roll back nkr_hwlease to %d", j); 1588262152Sluigi kring->nkr_hwlease = j; 1589262152Sluigi } else { 1590262152Sluigi while (howmany-- > 0) { 1591262152Sluigi ring->slot[j].len = 0; 1592262152Sluigi ring->slot[j].flags = 0; 1593262152Sluigi j = nm_next(j, lim); 1594262152Sluigi } 1595262152Sluigi } 1596262152Sluigi } 1597262152Sluigi p[lease_idx] = j; /* report I am done */ 1598262152Sluigi 1599262152Sluigi update_pos = kring->nr_hwtail; 1600262152Sluigi 1601262152Sluigi if (my_start == update_pos) { 1602262152Sluigi /* all slots before my_start have been reported, 1603262152Sluigi * so scan subsequent leases to see if other ranges 1604262152Sluigi * have been completed, and to a selwakeup or txsync. 1605262152Sluigi */ 1606262152Sluigi while (lease_idx != kring->nkr_lease_idx && 1607262152Sluigi p[lease_idx] != NR_NOSLOT) { 1608262152Sluigi j = p[lease_idx]; 1609262152Sluigi p[lease_idx] = NR_NOSLOT; 1610262152Sluigi lease_idx = nm_next(lease_idx, lim); 1611262152Sluigi } 1612262152Sluigi /* j is the new 'write' position. j != my_start 1613262152Sluigi * means there are new buffers to report 1614262152Sluigi */ 1615262152Sluigi if (likely(j != my_start)) { 1616262152Sluigi kring->nr_hwtail = j; 1617262152Sluigi still_locked = 0; 1618262152Sluigi mtx_unlock(&kring->q_lock); 1619262152Sluigi dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0); 1620270252Sluigi /* this is netmap_notify for VALE ports and 1621270252Sluigi * netmap_bwrap_notify for bwrap. The latter will 1622270252Sluigi * trigger a txsync on the underlying hwna 1623270252Sluigi */ 1624270252Sluigi if (dst_na->retry && retry--) { 1625270252Sluigi /* XXX this is going to call nm_notify again. 1626270252Sluigi * Only useful for bwrap in virtual machines 1627270252Sluigi */ 1628262152Sluigi goto retry; 1629270252Sluigi } 1630262152Sluigi } 1631262152Sluigi } 1632262152Sluigi if (still_locked) 1633262152Sluigi mtx_unlock(&kring->q_lock); 1634262152Sluigi } 1635262152Sluigicleanup: 1636262152Sluigi d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */ 1637262152Sluigi d->bq_len = 0; 1638262152Sluigi } 1639262152Sluigi brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */ 1640262152Sluigi brddst->bq_len = 0; 1641262152Sluigi return 0; 1642262152Sluigi} 1643262152Sluigi 1644270252Sluigi/* nm_txsync callback for VALE ports */ 1645262152Sluigistatic int 1646270252Sluiginetmap_vp_txsync(struct netmap_kring *kring, int flags) 1647262152Sluigi{ 1648270252Sluigi struct netmap_vp_adapter *na = 1649270252Sluigi (struct netmap_vp_adapter *)kring->na; 1650262152Sluigi u_int done; 1651262152Sluigi u_int const lim = kring->nkr_num_slots - 1; 1652262152Sluigi u_int const cur = kring->rcur; 1653262152Sluigi 1654262152Sluigi if (bridge_batch <= 0) { /* testing only */ 1655262152Sluigi done = cur; // used all 1656262152Sluigi goto done; 1657262152Sluigi } 1658270252Sluigi if (!na->na_bdg) { 1659270252Sluigi done = cur; 1660270252Sluigi goto done; 1661270252Sluigi } 1662262152Sluigi if (bridge_batch > NM_BDG_BATCH) 1663262152Sluigi bridge_batch = NM_BDG_BATCH; 1664262152Sluigi 1665270252Sluigi done = nm_bdg_preflush(kring, cur); 1666262152Sluigidone: 1667262152Sluigi if (done != cur) 1668262152Sluigi D("early break at %d/ %d, tail %d", done, cur, kring->nr_hwtail); 1669262152Sluigi /* 1670262152Sluigi * packets between 'done' and 'cur' are left unsent. 1671262152Sluigi */ 1672262152Sluigi kring->nr_hwcur = done; 1673262152Sluigi kring->nr_hwtail = nm_prev(done, lim); 1674262152Sluigi nm_txsync_finalize(kring); 1675262152Sluigi if (netmap_verbose) 1676270252Sluigi D("%s ring %d flags %d", na->up.name, kring->ring_id, flags); 1677262152Sluigi return 0; 1678262152Sluigi} 1679262152Sluigi 1680262152Sluigi 1681270252Sluigi/* rxsync code used by VALE ports nm_rxsync callback and also 1682270252Sluigi * internally by the brwap 1683262152Sluigi */ 1684262152Sluigistatic int 1685270252Sluiginetmap_vp_rxsync_locked(struct netmap_kring *kring, int flags) 1686262152Sluigi{ 1687270252Sluigi struct netmap_adapter *na = kring->na; 1688262152Sluigi struct netmap_ring *ring = kring->ring; 1689262152Sluigi u_int nm_i, lim = kring->nkr_num_slots - 1; 1690262152Sluigi u_int head = nm_rxsync_prologue(kring); 1691262152Sluigi int n; 1692262152Sluigi 1693262152Sluigi if (head > lim) { 1694262152Sluigi D("ouch dangerous reset!!!"); 1695262152Sluigi n = netmap_ring_reinit(kring); 1696262152Sluigi goto done; 1697262152Sluigi } 1698262152Sluigi 1699262152Sluigi /* First part, import newly received packets. */ 1700262152Sluigi /* actually nothing to do here, they are already in the kring */ 1701262152Sluigi 1702262152Sluigi /* Second part, skip past packets that userspace has released. */ 1703262152Sluigi nm_i = kring->nr_hwcur; 1704262152Sluigi if (nm_i != head) { 1705262152Sluigi /* consistency check, but nothing really important here */ 1706262152Sluigi for (n = 0; likely(nm_i != head); n++) { 1707262152Sluigi struct netmap_slot *slot = &ring->slot[nm_i]; 1708270252Sluigi void *addr = NMB(na, slot); 1709262152Sluigi 1710270252Sluigi if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */ 1711262152Sluigi D("bad buffer index %d, ignore ?", 1712262152Sluigi slot->buf_idx); 1713262152Sluigi } 1714262152Sluigi slot->flags &= ~NS_BUF_CHANGED; 1715262152Sluigi nm_i = nm_next(nm_i, lim); 1716262152Sluigi } 1717262152Sluigi kring->nr_hwcur = head; 1718262152Sluigi } 1719262152Sluigi 1720262152Sluigi /* tell userspace that there are new packets */ 1721262152Sluigi nm_rxsync_finalize(kring); 1722262152Sluigi n = 0; 1723262152Sluigidone: 1724262152Sluigi return n; 1725262152Sluigi} 1726262152Sluigi 1727262152Sluigi/* 1728270252Sluigi * nm_rxsync callback for VALE ports 1729262152Sluigi * user process reading from a VALE switch. 1730262152Sluigi * Already protected against concurrent calls from userspace, 1731262152Sluigi * but we must acquire the queue's lock to protect against 1732262152Sluigi * writers on the same queue. 1733262152Sluigi */ 1734262152Sluigistatic int 1735270252Sluiginetmap_vp_rxsync(struct netmap_kring *kring, int flags) 1736262152Sluigi{ 1737262152Sluigi int n; 1738262152Sluigi 1739262152Sluigi mtx_lock(&kring->q_lock); 1740270252Sluigi n = netmap_vp_rxsync_locked(kring, flags); 1741262152Sluigi mtx_unlock(&kring->q_lock); 1742262152Sluigi return n; 1743262152Sluigi} 1744262152Sluigi 1745262152Sluigi 1746270252Sluigi/* nm_bdg_attach callback for VALE ports 1747270252Sluigi * The na_vp port is this same netmap_adapter. There is no host port. 1748270252Sluigi */ 1749262152Sluigistatic int 1750270252Sluiginetmap_vp_bdg_attach(const char *name, struct netmap_adapter *na) 1751262152Sluigi{ 1752270252Sluigi struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; 1753270252Sluigi 1754270252Sluigi if (vpna->na_bdg) 1755270252Sluigi return EBUSY; 1756270252Sluigi na->na_vp = vpna; 1757270252Sluigi strncpy(na->name, name, sizeof(na->name)); 1758270252Sluigi na->na_hostvp = NULL; 1759270252Sluigi return 0; 1760270252Sluigi} 1761270252Sluigi 1762270252Sluigi/* create a netmap_vp_adapter that describes a VALE port. 1763270252Sluigi * Only persistent VALE ports have a non-null ifp. 1764270252Sluigi */ 1765270252Sluigistatic int 1766270252Sluiginetmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, struct netmap_vp_adapter **ret) 1767270252Sluigi{ 1768262152Sluigi struct netmap_vp_adapter *vpna; 1769262152Sluigi struct netmap_adapter *na; 1770262152Sluigi int error; 1771262152Sluigi u_int npipes = 0; 1772262152Sluigi 1773262152Sluigi vpna = malloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO); 1774262152Sluigi if (vpna == NULL) 1775262152Sluigi return ENOMEM; 1776262152Sluigi 1777262152Sluigi na = &vpna->up; 1778262152Sluigi 1779262152Sluigi na->ifp = ifp; 1780270252Sluigi strncpy(na->name, nmr->nr_name, sizeof(na->name)); 1781262152Sluigi 1782262152Sluigi /* bound checking */ 1783262152Sluigi na->num_tx_rings = nmr->nr_tx_rings; 1784262152Sluigi nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); 1785262152Sluigi nmr->nr_tx_rings = na->num_tx_rings; // write back 1786262152Sluigi na->num_rx_rings = nmr->nr_rx_rings; 1787262152Sluigi nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); 1788262152Sluigi nmr->nr_rx_rings = na->num_rx_rings; // write back 1789262152Sluigi nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE, 1790262152Sluigi 1, NM_BDG_MAXSLOTS, NULL); 1791262152Sluigi na->num_tx_desc = nmr->nr_tx_slots; 1792262152Sluigi nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE, 1793262152Sluigi 1, NM_BDG_MAXSLOTS, NULL); 1794262152Sluigi /* validate number of pipes. We want at least 1, 1795262152Sluigi * but probably can do with some more. 1796262152Sluigi * So let's use 2 as default (when 0 is supplied) 1797262152Sluigi */ 1798262152Sluigi npipes = nmr->nr_arg1; 1799262152Sluigi nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL); 1800262152Sluigi nmr->nr_arg1 = npipes; /* write back */ 1801262152Sluigi /* validate extra bufs */ 1802262152Sluigi nm_bound_var(&nmr->nr_arg3, 0, 0, 1803262152Sluigi 128*NM_BDG_MAXSLOTS, NULL); 1804262152Sluigi na->num_rx_desc = nmr->nr_rx_slots; 1805262152Sluigi vpna->virt_hdr_len = 0; 1806262152Sluigi vpna->mfs = 1514; 1807262152Sluigi /*if (vpna->mfs > netmap_buf_size) TODO netmap_buf_size is zero?? 1808262152Sluigi vpna->mfs = netmap_buf_size; */ 1809262152Sluigi if (netmap_verbose) 1810262152Sluigi D("max frame size %u", vpna->mfs); 1811262152Sluigi 1812262152Sluigi na->na_flags |= NAF_BDG_MAYSLEEP | NAF_MEM_OWNER; 1813270252Sluigi na->nm_txsync = netmap_vp_txsync; 1814270252Sluigi na->nm_rxsync = netmap_vp_rxsync; 1815270252Sluigi na->nm_register = netmap_vp_reg; 1816262152Sluigi na->nm_krings_create = netmap_vp_krings_create; 1817262152Sluigi na->nm_krings_delete = netmap_vp_krings_delete; 1818270252Sluigi na->nm_dtor = netmap_vp_dtor; 1819270252Sluigi na->nm_mem = netmap_mem_private_new(na->name, 1820262152Sluigi na->num_tx_rings, na->num_tx_desc, 1821262152Sluigi na->num_rx_rings, na->num_rx_desc, 1822262152Sluigi nmr->nr_arg3, npipes, &error); 1823262152Sluigi if (na->nm_mem == NULL) 1824262152Sluigi goto err; 1825270252Sluigi na->nm_bdg_attach = netmap_vp_bdg_attach; 1826262152Sluigi /* other nmd fields are set in the common routine */ 1827262152Sluigi error = netmap_attach_common(na); 1828262152Sluigi if (error) 1829262152Sluigi goto err; 1830270252Sluigi *ret = vpna; 1831262152Sluigi return 0; 1832262152Sluigi 1833262152Sluigierr: 1834262152Sluigi if (na->nm_mem != NULL) 1835262152Sluigi netmap_mem_private_delete(na->nm_mem); 1836262152Sluigi free(vpna, M_DEVBUF); 1837262152Sluigi return error; 1838262152Sluigi} 1839262152Sluigi 1840270252Sluigi/* Bridge wrapper code (bwrap). 1841270252Sluigi * This is used to connect a non-VALE-port netmap_adapter (hwna) to a 1842270252Sluigi * VALE switch. 1843270252Sluigi * The main task is to swap the meaning of tx and rx rings to match the 1844270252Sluigi * expectations of the VALE switch code (see nm_bdg_flush). 1845270252Sluigi * 1846270252Sluigi * The bwrap works by interposing a netmap_bwrap_adapter between the 1847270252Sluigi * rest of the system and the hwna. The netmap_bwrap_adapter looks like 1848270252Sluigi * a netmap_vp_adapter to the rest the system, but, internally, it 1849270252Sluigi * translates all callbacks to what the hwna expects. 1850270252Sluigi * 1851270252Sluigi * Note that we have to intercept callbacks coming from two sides: 1852270252Sluigi * 1853270252Sluigi * - callbacks coming from the netmap module are intercepted by 1854270252Sluigi * passing around the netmap_bwrap_adapter instead of the hwna 1855270252Sluigi * 1856270252Sluigi * - callbacks coming from outside of the netmap module only know 1857270252Sluigi * about the hwna. This, however, only happens in interrupt 1858270252Sluigi * handlers, where only the hwna->nm_notify callback is called. 1859270252Sluigi * What the bwrap does is to overwrite the hwna->nm_notify callback 1860270252Sluigi * with its own netmap_bwrap_intr_notify. 1861270252Sluigi * XXX This assumes that the hwna->nm_notify callback was the 1862270252Sluigi * standard netmap_notify(), as it is the case for nic adapters. 1863270252Sluigi * Any additional action performed by hwna->nm_notify will not be 1864270252Sluigi * performed by netmap_bwrap_intr_notify. 1865270252Sluigi * 1866270252Sluigi * Additionally, the bwrap can optionally attach the host rings pair 1867270252Sluigi * of the wrapped adapter to a different port of the switch. 1868270252Sluigi */ 1869262152Sluigi 1870270252Sluigi 1871262152Sluigistatic void 1872262152Sluiginetmap_bwrap_dtor(struct netmap_adapter *na) 1873262152Sluigi{ 1874262152Sluigi struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; 1875262152Sluigi struct netmap_adapter *hwna = bna->hwna; 1876262152Sluigi 1877262152Sluigi ND("na %p", na); 1878270252Sluigi /* drop reference to hwna->ifp. 1879270252Sluigi * If we don't do this, netmap_detach_common(na) 1880270252Sluigi * will think it has set NA(na->ifp) to NULL 1881270252Sluigi */ 1882270252Sluigi na->ifp = NULL; 1883270252Sluigi /* for safety, also drop the possible reference 1884270252Sluigi * in the hostna 1885270252Sluigi */ 1886270252Sluigi bna->host.up.ifp = NULL; 1887262152Sluigi 1888270252Sluigi hwna->nm_mem = bna->save_nmd; 1889262152Sluigi hwna->na_private = NULL; 1890270252Sluigi hwna->na_vp = hwna->na_hostvp = NULL; 1891270252Sluigi hwna->na_flags &= ~NAF_BUSY; 1892262152Sluigi netmap_adapter_put(hwna); 1893262152Sluigi 1894262152Sluigi} 1895262152Sluigi 1896262152Sluigi 1897262152Sluigi/* 1898262152Sluigi * Intr callback for NICs connected to a bridge. 1899262152Sluigi * Simply ignore tx interrupts (maybe we could try to recover space ?) 1900262152Sluigi * and pass received packets from nic to the bridge. 1901262152Sluigi * 1902262152Sluigi * XXX TODO check locking: this is called from the interrupt 1903262152Sluigi * handler so we should make sure that the interface is not 1904262152Sluigi * disconnected while passing down an interrupt. 1905262152Sluigi * 1906262152Sluigi * Note, no user process can access this NIC or the host stack. 1907262152Sluigi * The only part of the ring that is significant are the slots, 1908262152Sluigi * and head/cur/tail are set from the kring as needed 1909262152Sluigi * (part as a receive ring, part as a transmit ring). 1910262152Sluigi * 1911262152Sluigi * callback that overwrites the hwna notify callback. 1912262152Sluigi * Packets come from the outside or from the host stack and are put on an hwna rx ring. 1913262152Sluigi * The bridge wrapper then sends the packets through the bridge. 1914262152Sluigi */ 1915262152Sluigistatic int 1916262152Sluiginetmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, int flags) 1917262152Sluigi{ 1918262152Sluigi struct netmap_bwrap_adapter *bna = na->na_private; 1919262152Sluigi struct netmap_vp_adapter *hostna = &bna->host; 1920262152Sluigi struct netmap_kring *kring, *bkring; 1921262152Sluigi struct netmap_ring *ring; 1922262152Sluigi int is_host_ring = ring_nr == na->num_rx_rings; 1923262152Sluigi struct netmap_vp_adapter *vpna = &bna->up; 1924262152Sluigi int error = 0; 1925262152Sluigi 1926262152Sluigi if (netmap_verbose) 1927270252Sluigi D("%s %s%d 0x%x", na->name, 1928262152Sluigi (tx == NR_TX ? "TX" : "RX"), ring_nr, flags); 1929262152Sluigi 1930262152Sluigi if (flags & NAF_DISABLE_NOTIFY) { 1931270252Sluigi /* the enabled/disabled state of the ring has changed, 1932270252Sluigi * propagate the info to the wrapper (with tx/rx swapped) 1933270252Sluigi */ 1934270252Sluigi if (tx == NR_TX) { 1935270252Sluigi netmap_set_rxring(&vpna->up, ring_nr, 1936270252Sluigi na->tx_rings[ring_nr].nkr_stopped); 1937270252Sluigi } else { 1938270252Sluigi netmap_set_txring(&vpna->up, ring_nr, 1939270252Sluigi na->rx_rings[ring_nr].nkr_stopped); 1940270252Sluigi } 1941262152Sluigi return 0; 1942262152Sluigi } 1943262152Sluigi 1944270252Sluigi if (!nm_netmap_on(na)) 1945262152Sluigi return 0; 1946262152Sluigi 1947262152Sluigi /* we only care about receive interrupts */ 1948262152Sluigi if (tx == NR_TX) 1949262152Sluigi return 0; 1950262152Sluigi 1951262152Sluigi kring = &na->rx_rings[ring_nr]; 1952262152Sluigi ring = kring->ring; 1953262152Sluigi 1954262152Sluigi /* make sure the ring is not disabled */ 1955262152Sluigi if (nm_kr_tryget(kring)) 1956262152Sluigi return 0; 1957262152Sluigi 1958262152Sluigi if (is_host_ring && hostna->na_bdg == NULL) { 1959262152Sluigi error = bna->save_notify(na, ring_nr, tx, flags); 1960262152Sluigi goto put_out; 1961262152Sluigi } 1962262152Sluigi 1963262152Sluigi /* Here we expect ring->head = ring->cur = ring->tail 1964262152Sluigi * because everything has been released from the previous round. 1965262152Sluigi * However the ring is shared and we might have info from 1966262152Sluigi * the wrong side (the tx ring). Hence we overwrite with 1967262152Sluigi * the info from the rx kring. 1968262152Sluigi */ 1969262152Sluigi if (netmap_verbose) 1970270252Sluigi D("%s head %d cur %d tail %d (kring %d %d %d)", na->name, 1971262152Sluigi ring->head, ring->cur, ring->tail, 1972262152Sluigi kring->rhead, kring->rcur, kring->rtail); 1973262152Sluigi 1974262152Sluigi ring->head = kring->rhead; 1975262152Sluigi ring->cur = kring->rcur; 1976262152Sluigi ring->tail = kring->rtail; 1977262152Sluigi 1978262152Sluigi if (is_host_ring) { 1979262152Sluigi vpna = hostna; 1980262152Sluigi ring_nr = 0; 1981267282Sluigi } 1982262152Sluigi /* simulate a user wakeup on the rx ring */ 1983262152Sluigi /* fetch packets that have arrived. 1984262152Sluigi * XXX maybe do this in a loop ? 1985262152Sluigi */ 1986262152Sluigi error = kring->nm_sync(kring, 0); 1987262152Sluigi if (error) 1988262152Sluigi goto put_out; 1989262152Sluigi if (kring->nr_hwcur == kring->nr_hwtail && netmap_verbose) { 1990262152Sluigi D("how strange, interrupt with no packets on %s", 1991270252Sluigi na->name); 1992262152Sluigi goto put_out; 1993262152Sluigi } 1994262152Sluigi 1995262152Sluigi /* new packets are ring->cur to ring->tail, and the bkring 1996262152Sluigi * had hwcur == ring->cur. So advance ring->cur to ring->tail 1997262152Sluigi * to push all packets out. 1998262152Sluigi */ 1999262152Sluigi ring->head = ring->cur = ring->tail; 2000262152Sluigi 2001262152Sluigi /* also set tail to what the bwrap expects */ 2002262152Sluigi bkring = &vpna->up.tx_rings[ring_nr]; 2003262152Sluigi ring->tail = bkring->nr_hwtail; // rtail too ? 2004262152Sluigi 2005262152Sluigi /* pass packets to the switch */ 2006262152Sluigi nm_txsync_prologue(bkring); // XXX error checking ? 2007270252Sluigi netmap_vp_txsync(bkring, flags); 2008262152Sluigi 2009262152Sluigi /* mark all buffers as released on this ring */ 2010262152Sluigi ring->head = ring->cur = kring->nr_hwtail; 2011262152Sluigi ring->tail = kring->rtail; 2012262152Sluigi /* another call to actually release the buffers */ 2013262152Sluigi if (!is_host_ring) { 2014262152Sluigi error = kring->nm_sync(kring, 0); 2015262152Sluigi } else { 2016262152Sluigi /* mark all packets as released, as in the 2017262152Sluigi * second part of netmap_rxsync_from_host() 2018262152Sluigi */ 2019262152Sluigi kring->nr_hwcur = kring->nr_hwtail; 2020262152Sluigi nm_rxsync_finalize(kring); 2021262152Sluigi } 2022262152Sluigi 2023262152Sluigiput_out: 2024262152Sluigi nm_kr_put(kring); 2025262152Sluigi return error; 2026262152Sluigi} 2027262152Sluigi 2028262152Sluigi 2029270252Sluigi/* nm_register callback for bwrap */ 2030262152Sluigistatic int 2031262152Sluiginetmap_bwrap_register(struct netmap_adapter *na, int onoff) 2032262152Sluigi{ 2033262152Sluigi struct netmap_bwrap_adapter *bna = 2034262152Sluigi (struct netmap_bwrap_adapter *)na; 2035262152Sluigi struct netmap_adapter *hwna = bna->hwna; 2036262152Sluigi struct netmap_vp_adapter *hostna = &bna->host; 2037262152Sluigi int error; 2038262152Sluigi 2039270252Sluigi ND("%s %s", na->name, onoff ? "on" : "off"); 2040262152Sluigi 2041262152Sluigi if (onoff) { 2042262152Sluigi int i; 2043262152Sluigi 2044270252Sluigi /* netmap_do_regif has been called on the bwrap na. 2045270252Sluigi * We need to pass the information about the 2046270252Sluigi * memory allocator down to the hwna before 2047270252Sluigi * putting it in netmap mode 2048270252Sluigi */ 2049262152Sluigi hwna->na_lut = na->na_lut; 2050262152Sluigi hwna->na_lut_objtotal = na->na_lut_objtotal; 2051270252Sluigi hwna->na_lut_objsize = na->na_lut_objsize; 2052262152Sluigi 2053262152Sluigi if (hostna->na_bdg) { 2054270252Sluigi /* if the host rings have been attached to switch, 2055270252Sluigi * we need to copy the memory allocator information 2056270252Sluigi * in the hostna also 2057270252Sluigi */ 2058262152Sluigi hostna->up.na_lut = na->na_lut; 2059262152Sluigi hostna->up.na_lut_objtotal = na->na_lut_objtotal; 2060270252Sluigi hostna->up.na_lut_objsize = na->na_lut_objsize; 2061262152Sluigi } 2062262152Sluigi 2063262152Sluigi /* cross-link the netmap rings 2064262152Sluigi * The original number of rings comes from hwna, 2065262152Sluigi * rx rings on one side equals tx rings on the other. 2066270252Sluigi * We need to do this now, after the initialization 2067270252Sluigi * of the kring->ring pointers 2068262152Sluigi */ 2069262152Sluigi for (i = 0; i < na->num_rx_rings + 1; i++) { 2070262152Sluigi hwna->tx_rings[i].nkr_num_slots = na->rx_rings[i].nkr_num_slots; 2071262152Sluigi hwna->tx_rings[i].ring = na->rx_rings[i].ring; 2072262152Sluigi } 2073262152Sluigi for (i = 0; i < na->num_tx_rings + 1; i++) { 2074262152Sluigi hwna->rx_rings[i].nkr_num_slots = na->tx_rings[i].nkr_num_slots; 2075262152Sluigi hwna->rx_rings[i].ring = na->tx_rings[i].ring; 2076262152Sluigi } 2077262152Sluigi } 2078262152Sluigi 2079270252Sluigi /* forward the request to the hwna */ 2080270252Sluigi error = hwna->nm_register(hwna, onoff); 2081270252Sluigi if (error) 2082270252Sluigi return error; 2083262152Sluigi 2084270252Sluigi /* impersonate a netmap_vp_adapter */ 2085270252Sluigi netmap_vp_reg(na, onoff); 2086270252Sluigi if (hostna->na_bdg) 2087270252Sluigi netmap_vp_reg(&hostna->up, onoff); 2088262152Sluigi 2089262152Sluigi if (onoff) { 2090270252Sluigi /* intercept the hwna nm_nofify callback */ 2091262152Sluigi bna->save_notify = hwna->nm_notify; 2092262152Sluigi hwna->nm_notify = netmap_bwrap_intr_notify; 2093262152Sluigi } else { 2094262152Sluigi hwna->nm_notify = bna->save_notify; 2095262152Sluigi hwna->na_lut = NULL; 2096262152Sluigi hwna->na_lut_objtotal = 0; 2097270252Sluigi hwna->na_lut_objsize = 0; 2098262152Sluigi } 2099262152Sluigi 2100262152Sluigi return 0; 2101262152Sluigi} 2102262152Sluigi 2103270252Sluigi/* nm_config callback for bwrap */ 2104262152Sluigistatic int 2105262152Sluiginetmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd, 2106262152Sluigi u_int *rxr, u_int *rxd) 2107262152Sluigi{ 2108262152Sluigi struct netmap_bwrap_adapter *bna = 2109262152Sluigi (struct netmap_bwrap_adapter *)na; 2110262152Sluigi struct netmap_adapter *hwna = bna->hwna; 2111262152Sluigi 2112262152Sluigi /* forward the request */ 2113262152Sluigi netmap_update_config(hwna); 2114262152Sluigi /* swap the results */ 2115262152Sluigi *txr = hwna->num_rx_rings; 2116262152Sluigi *txd = hwna->num_rx_desc; 2117262152Sluigi *rxr = hwna->num_tx_rings; 2118262152Sluigi *rxd = hwna->num_rx_desc; 2119262152Sluigi 2120262152Sluigi return 0; 2121262152Sluigi} 2122262152Sluigi 2123262152Sluigi 2124270252Sluigi/* nm_krings_create callback for bwrap */ 2125262152Sluigistatic int 2126262152Sluiginetmap_bwrap_krings_create(struct netmap_adapter *na) 2127262152Sluigi{ 2128262152Sluigi struct netmap_bwrap_adapter *bna = 2129262152Sluigi (struct netmap_bwrap_adapter *)na; 2130262152Sluigi struct netmap_adapter *hwna = bna->hwna; 2131262152Sluigi struct netmap_adapter *hostna = &bna->host.up; 2132262152Sluigi int error; 2133262152Sluigi 2134270252Sluigi ND("%s", na->name); 2135262152Sluigi 2136270252Sluigi /* impersonate a netmap_vp_adapter */ 2137262152Sluigi error = netmap_vp_krings_create(na); 2138262152Sluigi if (error) 2139262152Sluigi return error; 2140262152Sluigi 2141270252Sluigi /* also create the hwna krings */ 2142262152Sluigi error = hwna->nm_krings_create(hwna); 2143262152Sluigi if (error) { 2144262152Sluigi netmap_vp_krings_delete(na); 2145262152Sluigi return error; 2146262152Sluigi } 2147270252Sluigi /* the connection between the bwrap krings and the hwna krings 2148270252Sluigi * will be perfomed later, in the nm_register callback, since 2149270252Sluigi * now the kring->ring pointers have not been initialized yet 2150270252Sluigi */ 2151262152Sluigi 2152262152Sluigi if (na->na_flags & NAF_HOST_RINGS) { 2153270252Sluigi /* the hostna rings are the host rings of the bwrap. 2154270252Sluigi * The corresponding krings must point back to the 2155270252Sluigi * hostna 2156270252Sluigi */ 2157262152Sluigi hostna->tx_rings = na->tx_rings + na->num_tx_rings; 2158270252Sluigi hostna->tx_rings[0].na = hostna; 2159262152Sluigi hostna->rx_rings = na->rx_rings + na->num_rx_rings; 2160270252Sluigi hostna->rx_rings[0].na = hostna; 2161262152Sluigi } 2162262152Sluigi 2163262152Sluigi return 0; 2164262152Sluigi} 2165262152Sluigi 2166262152Sluigi 2167262152Sluigistatic void 2168262152Sluiginetmap_bwrap_krings_delete(struct netmap_adapter *na) 2169262152Sluigi{ 2170262152Sluigi struct netmap_bwrap_adapter *bna = 2171262152Sluigi (struct netmap_bwrap_adapter *)na; 2172262152Sluigi struct netmap_adapter *hwna = bna->hwna; 2173262152Sluigi 2174270252Sluigi ND("%s", na->name); 2175262152Sluigi 2176262152Sluigi hwna->nm_krings_delete(hwna); 2177262152Sluigi netmap_vp_krings_delete(na); 2178262152Sluigi} 2179262152Sluigi 2180262152Sluigi 2181262152Sluigi/* notify method for the bridge-->hwna direction */ 2182262152Sluigistatic int 2183262152Sluiginetmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags) 2184262152Sluigi{ 2185262152Sluigi struct netmap_bwrap_adapter *bna = 2186262152Sluigi (struct netmap_bwrap_adapter *)na; 2187262152Sluigi struct netmap_adapter *hwna = bna->hwna; 2188262152Sluigi struct netmap_kring *kring, *hw_kring; 2189262152Sluigi struct netmap_ring *ring; 2190262152Sluigi u_int lim; 2191262152Sluigi int error = 0; 2192262152Sluigi 2193262152Sluigi if (tx == NR_TX) 2194262152Sluigi return EINVAL; 2195262152Sluigi 2196262152Sluigi kring = &na->rx_rings[ring_n]; 2197262152Sluigi hw_kring = &hwna->tx_rings[ring_n]; 2198262152Sluigi ring = kring->ring; 2199262152Sluigi lim = kring->nkr_num_slots - 1; 2200262152Sluigi 2201270252Sluigi if (!nm_netmap_on(hwna)) 2202262152Sluigi return 0; 2203262152Sluigi mtx_lock(&kring->q_lock); 2204262152Sluigi /* first step: simulate a user wakeup on the rx ring */ 2205270252Sluigi netmap_vp_rxsync_locked(kring, flags); 2206262152Sluigi ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", 2207270252Sluigi na->name, ring_n, 2208262152Sluigi kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, 2209262152Sluigi ring->head, ring->cur, ring->tail, 2210262152Sluigi hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail); 2211262152Sluigi /* second step: the simulated user consumes all new packets */ 2212262152Sluigi ring->head = ring->cur = ring->tail; 2213262152Sluigi 2214262152Sluigi /* third step: the new packets are sent on the tx ring 2215262152Sluigi * (which is actually the same ring) 2216262152Sluigi */ 2217262152Sluigi /* set tail to what the hw expects */ 2218262152Sluigi ring->tail = hw_kring->rtail; 2219262152Sluigi nm_txsync_prologue(&hwna->tx_rings[ring_n]); // XXX error checking ? 2220262152Sluigi error = hw_kring->nm_sync(hw_kring, flags); 2221262152Sluigi 2222262152Sluigi /* fourth step: now we are back the rx ring */ 2223262152Sluigi /* claim ownership on all hw owned bufs */ 2224262152Sluigi ring->head = nm_next(ring->tail, lim); /* skip past reserved slot */ 2225262152Sluigi ring->tail = kring->rtail; /* restore saved value of tail, for safety */ 2226262152Sluigi 2227262152Sluigi /* fifth step: the user goes to sleep again, causing another rxsync */ 2228270252Sluigi netmap_vp_rxsync_locked(kring, flags); 2229262152Sluigi ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", 2230270252Sluigi na->name, ring_n, 2231262152Sluigi kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, 2232262152Sluigi ring->head, ring->cur, ring->tail, 2233262152Sluigi hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail); 2234262152Sluigi mtx_unlock(&kring->q_lock); 2235262152Sluigi return error; 2236262152Sluigi} 2237262152Sluigi 2238262152Sluigi 2239270252Sluigi/* notify method for the bridge-->host-rings path */ 2240262152Sluigistatic int 2241262152Sluiginetmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags) 2242262152Sluigi{ 2243262152Sluigi struct netmap_bwrap_adapter *bna = na->na_private; 2244262152Sluigi struct netmap_adapter *port_na = &bna->up.up; 2245262152Sluigi if (tx == NR_TX || ring_n != 0) 2246262152Sluigi return EINVAL; 2247262152Sluigi return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags); 2248262152Sluigi} 2249262152Sluigi 2250262152Sluigi 2251270252Sluigi/* nm_bdg_ctl callback for the bwrap. 2252270252Sluigi * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd]. 2253270252Sluigi * On attach, it needs to provide a fake netmap_priv_d structure and 2254270252Sluigi * perform a netmap_do_regif() on the bwrap. This will put both the 2255270252Sluigi * bwrap and the hwna in netmap mode, with the netmap rings shared 2256270252Sluigi * and cross linked. Moroever, it will start intercepting interrupts 2257270252Sluigi * directed to hwna. 2258270252Sluigi */ 2259262152Sluigistatic int 2260270252Sluiginetmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach) 2261262152Sluigi{ 2262270252Sluigi struct netmap_priv_d *npriv; 2263270252Sluigi struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; 2264270252Sluigi struct netmap_if *nifp; 2265270252Sluigi int error = 0; 2266270252Sluigi 2267270252Sluigi if (attach) { 2268270252Sluigi if (NETMAP_OWNED_BY_ANY(na)) { 2269270252Sluigi return EBUSY; 2270270252Sluigi } 2271270252Sluigi if (bna->na_kpriv) { 2272270252Sluigi /* nothing to do */ 2273270252Sluigi return 0; 2274270252Sluigi } 2275270252Sluigi npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO); 2276270252Sluigi if (npriv == NULL) 2277270252Sluigi return ENOMEM; 2278270252Sluigi nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags, &error); 2279270252Sluigi if (!nifp) { 2280270252Sluigi bzero(npriv, sizeof(*npriv)); 2281270252Sluigi free(npriv, M_DEVBUF); 2282270252Sluigi return error; 2283270252Sluigi } 2284270252Sluigi bna->na_kpriv = npriv; 2285270252Sluigi na->na_flags |= NAF_BUSY; 2286270252Sluigi } else { 2287270252Sluigi int last_instance; 2288270252Sluigi 2289270252Sluigi if (na->active_fds == 0) /* not registered */ 2290270252Sluigi return EINVAL; 2291270252Sluigi last_instance = netmap_dtor_locked(bna->na_kpriv); 2292270252Sluigi if (!last_instance) { 2293270252Sluigi D("--- error, trying to detach an entry with active mmaps"); 2294270252Sluigi error = EINVAL; 2295270252Sluigi } else { 2296270252Sluigi struct nm_bridge *b = bna->up.na_bdg, 2297270252Sluigi *bh = bna->host.na_bdg; 2298270252Sluigi npriv = bna->na_kpriv; 2299270252Sluigi bna->na_kpriv = NULL; 2300270252Sluigi D("deleting priv"); 2301270252Sluigi 2302270252Sluigi bzero(npriv, sizeof(*npriv)); 2303270252Sluigi free(npriv, M_DEVBUF); 2304270252Sluigi if (b) { 2305270252Sluigi /* XXX the bwrap dtor should take care 2306270252Sluigi * of this (2014-06-16) 2307270252Sluigi */ 2308270252Sluigi netmap_bdg_detach_common(b, bna->up.bdg_port, 2309270252Sluigi (bh ? bna->host.bdg_port : -1)); 2310270252Sluigi } 2311270252Sluigi na->na_flags &= ~NAF_BUSY; 2312270252Sluigi } 2313270252Sluigi } 2314270252Sluigi return error; 2315270252Sluigi 2316270252Sluigi} 2317270252Sluigi 2318270252Sluigi/* attach a bridge wrapper to the 'real' device */ 2319270252Sluigiint 2320270252Sluiginetmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna) 2321270252Sluigi{ 2322262152Sluigi struct netmap_bwrap_adapter *bna; 2323270252Sluigi struct netmap_adapter *na = NULL; 2324270252Sluigi struct netmap_adapter *hostna = NULL; 2325270252Sluigi int error = 0; 2326262152Sluigi 2327270252Sluigi /* make sure the NIC is not already in use */ 2328270252Sluigi if (NETMAP_OWNED_BY_ANY(hwna)) { 2329270252Sluigi D("NIC %s busy, cannot attach to bridge", hwna->name); 2330270252Sluigi return EBUSY; 2331270252Sluigi } 2332262152Sluigi 2333262152Sluigi bna = malloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO); 2334270252Sluigi if (bna == NULL) { 2335262152Sluigi return ENOMEM; 2336270252Sluigi } 2337262152Sluigi 2338262152Sluigi na = &bna->up.up; 2339270252Sluigi strncpy(na->name, nr_name, sizeof(na->name)); 2340262152Sluigi /* fill the ring data for the bwrap adapter with rx/tx meanings 2341262152Sluigi * swapped. The real cross-linking will be done during register, 2342262152Sluigi * when all the krings will have been created. 2343262152Sluigi */ 2344262152Sluigi na->num_rx_rings = hwna->num_tx_rings; 2345262152Sluigi na->num_tx_rings = hwna->num_rx_rings; 2346262152Sluigi na->num_tx_desc = hwna->num_rx_desc; 2347262152Sluigi na->num_rx_desc = hwna->num_tx_desc; 2348262152Sluigi na->nm_dtor = netmap_bwrap_dtor; 2349262152Sluigi na->nm_register = netmap_bwrap_register; 2350262152Sluigi // na->nm_txsync = netmap_bwrap_txsync; 2351262152Sluigi // na->nm_rxsync = netmap_bwrap_rxsync; 2352262152Sluigi na->nm_config = netmap_bwrap_config; 2353262152Sluigi na->nm_krings_create = netmap_bwrap_krings_create; 2354262152Sluigi na->nm_krings_delete = netmap_bwrap_krings_delete; 2355262152Sluigi na->nm_notify = netmap_bwrap_notify; 2356270252Sluigi na->nm_bdg_ctl = netmap_bwrap_bdg_ctl; 2357270252Sluigi na->pdev = hwna->pdev; 2358270252Sluigi na->nm_mem = netmap_mem_private_new(na->name, 2359270252Sluigi na->num_tx_rings, na->num_tx_desc, 2360270252Sluigi na->num_rx_rings, na->num_rx_desc, 2361270252Sluigi 0, 0, &error); 2362270252Sluigi na->na_flags |= NAF_MEM_OWNER; 2363270252Sluigi if (na->nm_mem == NULL) 2364270252Sluigi goto err_put; 2365262152Sluigi bna->up.retry = 1; /* XXX maybe this should depend on the hwna */ 2366262152Sluigi 2367262152Sluigi bna->hwna = hwna; 2368262152Sluigi netmap_adapter_get(hwna); 2369262152Sluigi hwna->na_private = bna; /* weak reference */ 2370270252Sluigi hwna->na_vp = &bna->up; 2371270252Sluigi 2372262152Sluigi if (hwna->na_flags & NAF_HOST_RINGS) { 2373270252Sluigi if (hwna->na_flags & NAF_SW_ONLY) 2374270252Sluigi na->na_flags |= NAF_SW_ONLY; 2375262152Sluigi na->na_flags |= NAF_HOST_RINGS; 2376262152Sluigi hostna = &bna->host.up; 2377270252Sluigi snprintf(hostna->name, sizeof(hostna->name), "%s^", nr_name); 2378262152Sluigi hostna->ifp = hwna->ifp; 2379262152Sluigi hostna->num_tx_rings = 1; 2380262152Sluigi hostna->num_tx_desc = hwna->num_rx_desc; 2381262152Sluigi hostna->num_rx_rings = 1; 2382262152Sluigi hostna->num_rx_desc = hwna->num_tx_desc; 2383262152Sluigi // hostna->nm_txsync = netmap_bwrap_host_txsync; 2384262152Sluigi // hostna->nm_rxsync = netmap_bwrap_host_rxsync; 2385262152Sluigi hostna->nm_notify = netmap_bwrap_host_notify; 2386262152Sluigi hostna->nm_mem = na->nm_mem; 2387262152Sluigi hostna->na_private = bna; 2388270252Sluigi hostna->na_vp = &bna->up; 2389270252Sluigi na->na_hostvp = hwna->na_hostvp = 2390270252Sluigi hostna->na_hostvp = &bna->host; 2391270252Sluigi hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */ 2392262152Sluigi } 2393262152Sluigi 2394262152Sluigi ND("%s<->%s txr %d txd %d rxr %d rxd %d", 2395270252Sluigi na->name, ifp->if_xname, 2396262152Sluigi na->num_tx_rings, na->num_tx_desc, 2397262152Sluigi na->num_rx_rings, na->num_rx_desc); 2398262152Sluigi 2399262152Sluigi error = netmap_attach_common(na); 2400262152Sluigi if (error) { 2401270252Sluigi goto err_free; 2402262152Sluigi } 2403270252Sluigi /* make bwrap ifp point to the real ifp 2404270252Sluigi * NOTE: netmap_attach_common() interprets a non-NULL na->ifp 2405270252Sluigi * as a request to make the ifp point to the na. Since we 2406270252Sluigi * do not want to change the na already pointed to by hwna->ifp, 2407270252Sluigi * the following assignment has to be delayed until now 2408270252Sluigi */ 2409270252Sluigi na->ifp = hwna->ifp; 2410270252Sluigi hwna->na_flags |= NAF_BUSY; 2411270252Sluigi /* make hwna point to the allocator we are actually using, 2412270252Sluigi * so that monitors will be able to find it 2413270252Sluigi */ 2414270252Sluigi bna->save_nmd = hwna->nm_mem; 2415270252Sluigi hwna->nm_mem = na->nm_mem; 2416262152Sluigi return 0; 2417270252Sluigi 2418270252Sluigierr_free: 2419270252Sluigi netmap_mem_private_delete(na->nm_mem); 2420270252Sluigierr_put: 2421270252Sluigi hwna->na_vp = hwna->na_hostvp = NULL; 2422270252Sluigi netmap_adapter_put(hwna); 2423270252Sluigi free(bna, M_DEVBUF); 2424270252Sluigi return error; 2425270252Sluigi 2426262152Sluigi} 2427262152Sluigi 2428262152Sluigi 2429262152Sluigivoid 2430262152Sluiginetmap_init_bridges(void) 2431262152Sluigi{ 2432262152Sluigi int i; 2433262152Sluigi bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */ 2434262152Sluigi for (i = 0; i < NM_BRIDGES; i++) 2435262152Sluigi BDG_RWINIT(&nm_bridges[i]); 2436262152Sluigi} 2437262152Sluigi#endif /* WITH_VALE */ 2438