1262153Sluigi/* 2262153Sluigi * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. 3262153Sluigi * 4262153Sluigi * Redistribution and use in source and binary forms, with or without 5262153Sluigi * modification, are permitted provided that the following conditions 6262153Sluigi * are met: 7262153Sluigi * 1. Redistributions of source code must retain the above copyright 8262153Sluigi * notice, this list of conditions and the following disclaimer. 9262153Sluigi * 2. Redistributions in binary form must reproduce the above copyright 10262153Sluigi * notice, this list of conditions and the following disclaimer in the 11262153Sluigi * documentation and/or other materials provided with the distribution. 12262153Sluigi * 13262153Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14262153Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15262153Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16262153Sluigi * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17262153Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18262153Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19262153Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20262153Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21262153Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22262153Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23262153Sluigi * SUCH DAMAGE. 24262153Sluigi */ 25262153Sluigi 26262153Sluigi 27262153Sluigi/* 28262153Sluigi * This module implements the VALE switch for netmap 29262153Sluigi 30262153Sluigi--- VALE SWITCH --- 31262153Sluigi 32262153SluigiNMG_LOCK() serializes all modifications to switches and ports. 33262153SluigiA switch cannot be deleted until all ports are gone. 34262153Sluigi 35262153SluigiFor each switch, an SX lock (RWlock on linux) protects 36262153Sluigideletion of ports. When configuring or deleting a new port, the 37262153Sluigilock is acquired in exclusive mode (after holding NMG_LOCK). 38262153SluigiWhen forwarding, the lock is acquired in shared mode (without NMG_LOCK). 39262153SluigiThe lock is held throughout the entire forwarding cycle, 40262153Sluigiduring which the thread may incur in a page fault. 41262153SluigiHence it is important that sleepable shared locks are used. 42262153Sluigi 43262153SluigiOn the rx ring, the per-port lock is grabbed initially to reserve 44262153Sluigia number of slot in the ring, then the lock is released, 45262153Sluigipackets are copied from source to destination, and then 46262153Sluigithe lock is acquired again and the receive ring is updated. 47262153Sluigi(A similar thing is done on the tx ring for NIC and host stack 48262153Sluigiports attached to the switch) 49262153Sluigi 50262153Sluigi */ 51262153Sluigi 52262153Sluigi/* 53262153Sluigi * OS-specific code that is used only within this file. 54262153Sluigi * Other OS-specific code that must be accessed by drivers 55262153Sluigi * is present in netmap_kern.h 56262153Sluigi */ 57262153Sluigi 58262153Sluigi#if defined(__FreeBSD__) 59262153Sluigi#include <sys/cdefs.h> /* prerequisite */ 60262153Sluigi__FBSDID("$FreeBSD$"); 61262153Sluigi 62262153Sluigi#include <sys/types.h> 63262153Sluigi#include <sys/errno.h> 64262153Sluigi#include <sys/param.h> /* defines used in kernel.h */ 65262153Sluigi#include <sys/kernel.h> /* types used in module initialization */ 66262153Sluigi#include <sys/conf.h> /* cdevsw struct, UID, GID */ 67262153Sluigi#include <sys/sockio.h> 68262153Sluigi#include <sys/socketvar.h> /* struct socket */ 69262153Sluigi#include <sys/malloc.h> 70262153Sluigi#include <sys/poll.h> 71262153Sluigi#include <sys/rwlock.h> 72262153Sluigi#include <sys/socket.h> /* sockaddrs */ 73262153Sluigi#include <sys/selinfo.h> 74262153Sluigi#include <sys/sysctl.h> 75262153Sluigi#include <net/if.h> 76262153Sluigi#include <net/if_var.h> 77262153Sluigi#include <net/bpf.h> /* BIOCIMMEDIATE */ 78262153Sluigi#include <machine/bus.h> /* bus_dmamap_* */ 79262153Sluigi#include <sys/endian.h> 80262153Sluigi#include <sys/refcount.h> 81262153Sluigi 82262153Sluigi 83262153Sluigi#define BDG_RWLOCK_T struct rwlock // struct rwlock 84262153Sluigi 85262153Sluigi#define BDG_RWINIT(b) \ 86262153Sluigi rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS) 87262153Sluigi#define BDG_WLOCK(b) rw_wlock(&(b)->bdg_lock) 88262153Sluigi#define BDG_WUNLOCK(b) rw_wunlock(&(b)->bdg_lock) 89262153Sluigi#define BDG_RLOCK(b) rw_rlock(&(b)->bdg_lock) 90262153Sluigi#define BDG_RTRYLOCK(b) rw_try_rlock(&(b)->bdg_lock) 91262153Sluigi#define BDG_RUNLOCK(b) rw_runlock(&(b)->bdg_lock) 92262153Sluigi#define BDG_RWDESTROY(b) rw_destroy(&(b)->bdg_lock) 93262153Sluigi 94262153Sluigi 95262153Sluigi#elif defined(linux) 96262153Sluigi 97262153Sluigi#include "bsd_glue.h" 98262153Sluigi 99262153Sluigi#elif defined(__APPLE__) 100262153Sluigi 101262153Sluigi#warning OSX support is only partial 102262153Sluigi#include "osx_glue.h" 103262153Sluigi 104262153Sluigi#else 105262153Sluigi 106262153Sluigi#error Unsupported platform 107262153Sluigi 108262153Sluigi#endif /* unsupported */ 109262153Sluigi 110262153Sluigi/* 111262153Sluigi * common headers 112262153Sluigi */ 113262153Sluigi 114262153Sluigi#include <net/netmap.h> 115262153Sluigi#include <dev/netmap/netmap_kern.h> 116262153Sluigi#include <dev/netmap/netmap_mem2.h> 117262153Sluigi 118262153Sluigi#ifdef WITH_VALE 119262153Sluigi 120262153Sluigi/* 121262153Sluigi * system parameters (most of them in netmap_kern.h) 122262153Sluigi * NM_NAME prefix for switch port names, default "vale" 123262153Sluigi * NM_BDG_MAXPORTS number of ports 124262153Sluigi * NM_BRIDGES max number of switches in the system. 125262153Sluigi * XXX should become a sysctl or tunable 126262153Sluigi * 127262153Sluigi * Switch ports are named valeX:Y where X is the switch name and Y 128262153Sluigi * is the port. If Y matches a physical interface name, the port is 129262153Sluigi * connected to a physical device. 130262153Sluigi * 131262153Sluigi * Unlike physical interfaces, switch ports use their own memory region 132262153Sluigi * for rings and buffers. 133262153Sluigi * The virtual interfaces use per-queue lock instead of core lock. 134262153Sluigi * In the tx loop, we aggregate traffic in batches to make all operations 135262153Sluigi * faster. The batch size is bridge_batch. 136262153Sluigi */ 137262153Sluigi#define NM_BDG_MAXRINGS 16 /* XXX unclear how many. */ 138262153Sluigi#define NM_BDG_MAXSLOTS 4096 /* XXX same as above */ 139262153Sluigi#define NM_BRIDGE_RINGSIZE 1024 /* in the device */ 140262153Sluigi#define NM_BDG_HASH 1024 /* forwarding table entries */ 141262153Sluigi#define NM_BDG_BATCH 1024 /* entries in the forwarding buffer */ 142262153Sluigi#define NM_MULTISEG 64 /* max size of a chain of bufs */ 143262153Sluigi/* actual size of the tables */ 144262153Sluigi#define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NM_MULTISEG) 145262153Sluigi/* NM_FT_NULL terminates a list of slots in the ft */ 146262153Sluigi#define NM_FT_NULL NM_BDG_BATCH_MAX 147262153Sluigi#define NM_BRIDGES 8 /* number of bridges */ 148262153Sluigi 149262153Sluigi 150262153Sluigi/* 151262153Sluigi * bridge_batch is set via sysctl to the max batch size to be 152262153Sluigi * used in the bridge. The actual value may be larger as the 153262153Sluigi * last packet in the block may overflow the size. 154262153Sluigi */ 155262153Sluigiint bridge_batch = NM_BDG_BATCH; /* bridge batch size */ 156262153SluigiSYSCTL_DECL(_dev_netmap); 157262153SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , ""); 158262153Sluigi 159262153Sluigi 160262153Sluigistatic int bdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp); 161262153Sluigistatic int bdg_netmap_reg(struct netmap_adapter *na, int onoff); 162262153Sluigistatic int netmap_bwrap_attach(struct ifnet *, struct ifnet *); 163262153Sluigistatic int netmap_bwrap_register(struct netmap_adapter *, int onoff); 164262153Sluigiint kern_netmap_regif(struct nmreq *nmr); 165262153Sluigi 166262153Sluigi/* 167262153Sluigi * For each output interface, nm_bdg_q is used to construct a list. 168262153Sluigi * bq_len is the number of output buffers (we can have coalescing 169262153Sluigi * during the copy). 170262153Sluigi */ 171262153Sluigistruct nm_bdg_q { 172262153Sluigi uint16_t bq_head; 173262153Sluigi uint16_t bq_tail; 174262153Sluigi uint32_t bq_len; /* number of buffers */ 175262153Sluigi}; 176262153Sluigi 177262153Sluigi/* XXX revise this */ 178262153Sluigistruct nm_hash_ent { 179262153Sluigi uint64_t mac; /* the top 2 bytes are the epoch */ 180262153Sluigi uint64_t ports; 181262153Sluigi}; 182262153Sluigi 183262153Sluigi/* 184262153Sluigi * nm_bridge is a descriptor for a VALE switch. 185262153Sluigi * Interfaces for a bridge are all in bdg_ports[]. 186262153Sluigi * The array has fixed size, an empty entry does not terminate 187262153Sluigi * the search, but lookups only occur on attach/detach so we 188262153Sluigi * don't mind if they are slow. 189262153Sluigi * 190262153Sluigi * The bridge is non blocking on the transmit ports: excess 191262153Sluigi * packets are dropped if there is no room on the output port. 192262153Sluigi * 193262153Sluigi * bdg_lock protects accesses to the bdg_ports array. 194262153Sluigi * This is a rw lock (or equivalent). 195262153Sluigi */ 196262153Sluigistruct nm_bridge { 197262153Sluigi /* XXX what is the proper alignment/layout ? */ 198262153Sluigi BDG_RWLOCK_T bdg_lock; /* protects bdg_ports */ 199262153Sluigi int bdg_namelen; 200262153Sluigi uint32_t bdg_active_ports; /* 0 means free */ 201262153Sluigi char bdg_basename[IFNAMSIZ]; 202262153Sluigi 203262153Sluigi /* Indexes of active ports (up to active_ports) 204262153Sluigi * and all other remaining ports. 205262153Sluigi */ 206262153Sluigi uint8_t bdg_port_index[NM_BDG_MAXPORTS]; 207262153Sluigi 208262153Sluigi struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS]; 209262153Sluigi 210262153Sluigi 211262153Sluigi /* 212262153Sluigi * The function to decide the destination port. 213262153Sluigi * It returns either of an index of the destination port, 214262153Sluigi * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to 215262153Sluigi * forward this packet. ring_nr is the source ring index, and the 216262153Sluigi * function may overwrite this value to forward this packet to a 217262153Sluigi * different ring index. 218262153Sluigi * This function must be set by netmap_bdgctl(). 219262153Sluigi */ 220262153Sluigi bdg_lookup_fn_t nm_bdg_lookup; 221262153Sluigi 222262153Sluigi /* the forwarding table, MAC+ports. 223262153Sluigi * XXX should be changed to an argument to be passed to 224262153Sluigi * the lookup function, and allocated on attach 225262153Sluigi */ 226262153Sluigi struct nm_hash_ent ht[NM_BDG_HASH]; 227262153Sluigi}; 228262153Sluigi 229262153Sluigi 230262153Sluigi/* 231262153Sluigi * XXX in principle nm_bridges could be created dynamically 232262153Sluigi * Right now we have a static array and deletions are protected 233262153Sluigi * by an exclusive lock. 234262153Sluigi */ 235262153Sluigistruct nm_bridge nm_bridges[NM_BRIDGES]; 236262153Sluigi 237262153Sluigi 238262153Sluigi/* 239262153Sluigi * this is a slightly optimized copy routine which rounds 240262153Sluigi * to multiple of 64 bytes and is often faster than dealing 241262153Sluigi * with other odd sizes. We assume there is enough room 242262153Sluigi * in the source and destination buffers. 243262153Sluigi * 244262153Sluigi * XXX only for multiples of 64 bytes, non overlapped. 245262153Sluigi */ 246262153Sluigistatic inline void 247262153Sluigipkt_copy(void *_src, void *_dst, int l) 248262153Sluigi{ 249262153Sluigi uint64_t *src = _src; 250262153Sluigi uint64_t *dst = _dst; 251262153Sluigi if (unlikely(l >= 1024)) { 252262153Sluigi memcpy(dst, src, l); 253262153Sluigi return; 254262153Sluigi } 255262153Sluigi for (; likely(l > 0); l-=64) { 256262153Sluigi *dst++ = *src++; 257262153Sluigi *dst++ = *src++; 258262153Sluigi *dst++ = *src++; 259262153Sluigi *dst++ = *src++; 260262153Sluigi *dst++ = *src++; 261262153Sluigi *dst++ = *src++; 262262153Sluigi *dst++ = *src++; 263262153Sluigi *dst++ = *src++; 264262153Sluigi } 265262153Sluigi} 266262153Sluigi 267262153Sluigi 268262153Sluigi/* 269262153Sluigi * locate a bridge among the existing ones. 270262153Sluigi * MUST BE CALLED WITH NMG_LOCK() 271262153Sluigi * 272262153Sluigi * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME. 273262153Sluigi * We assume that this is called with a name of at least NM_NAME chars. 274262153Sluigi */ 275262153Sluigistatic struct nm_bridge * 276262153Sluiginm_find_bridge(const char *name, int create) 277262153Sluigi{ 278262153Sluigi int i, l, namelen; 279262153Sluigi struct nm_bridge *b = NULL; 280262153Sluigi 281262153Sluigi NMG_LOCK_ASSERT(); 282262153Sluigi 283262153Sluigi namelen = strlen(NM_NAME); /* base length */ 284262153Sluigi l = name ? strlen(name) : 0; /* actual length */ 285262153Sluigi if (l < namelen) { 286262153Sluigi D("invalid bridge name %s", name ? name : NULL); 287262153Sluigi return NULL; 288262153Sluigi } 289262153Sluigi for (i = namelen + 1; i < l; i++) { 290262153Sluigi if (name[i] == ':') { 291262153Sluigi namelen = i; 292262153Sluigi break; 293262153Sluigi } 294262153Sluigi } 295262153Sluigi if (namelen >= IFNAMSIZ) 296262153Sluigi namelen = IFNAMSIZ; 297262153Sluigi ND("--- prefix is '%.*s' ---", namelen, name); 298262153Sluigi 299262153Sluigi /* lookup the name, remember empty slot if there is one */ 300262153Sluigi for (i = 0; i < NM_BRIDGES; i++) { 301262153Sluigi struct nm_bridge *x = nm_bridges + i; 302262153Sluigi 303262153Sluigi if (x->bdg_active_ports == 0) { 304262153Sluigi if (create && b == NULL) 305262153Sluigi b = x; /* record empty slot */ 306262153Sluigi } else if (x->bdg_namelen != namelen) { 307262153Sluigi continue; 308262153Sluigi } else if (strncmp(name, x->bdg_basename, namelen) == 0) { 309262153Sluigi ND("found '%.*s' at %d", namelen, name, i); 310262153Sluigi b = x; 311262153Sluigi break; 312262153Sluigi } 313262153Sluigi } 314262153Sluigi if (i == NM_BRIDGES && b) { /* name not found, can create entry */ 315262153Sluigi /* initialize the bridge */ 316262153Sluigi strncpy(b->bdg_basename, name, namelen); 317262153Sluigi ND("create new bridge %s with ports %d", b->bdg_basename, 318262153Sluigi b->bdg_active_ports); 319262153Sluigi b->bdg_namelen = namelen; 320262153Sluigi b->bdg_active_ports = 0; 321262153Sluigi for (i = 0; i < NM_BDG_MAXPORTS; i++) 322262153Sluigi b->bdg_port_index[i] = i; 323262153Sluigi /* set the default function */ 324262153Sluigi b->nm_bdg_lookup = netmap_bdg_learning; 325262153Sluigi /* reset the MAC address table */ 326262153Sluigi bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH); 327262153Sluigi } 328262153Sluigi return b; 329262153Sluigi} 330262153Sluigi 331262153Sluigi 332262153Sluigi/* 333262153Sluigi * Free the forwarding tables for rings attached to switch ports. 334262153Sluigi */ 335262153Sluigistatic void 336262153Sluiginm_free_bdgfwd(struct netmap_adapter *na) 337262153Sluigi{ 338262153Sluigi int nrings, i; 339262153Sluigi struct netmap_kring *kring; 340262153Sluigi 341262153Sluigi NMG_LOCK_ASSERT(); 342262153Sluigi nrings = na->num_tx_rings; 343262153Sluigi kring = na->tx_rings; 344262153Sluigi for (i = 0; i < nrings; i++) { 345262153Sluigi if (kring[i].nkr_ft) { 346262153Sluigi free(kring[i].nkr_ft, M_DEVBUF); 347262153Sluigi kring[i].nkr_ft = NULL; /* protect from freeing twice */ 348262153Sluigi } 349262153Sluigi } 350262153Sluigi} 351262153Sluigi 352262153Sluigi 353262153Sluigi/* 354262153Sluigi * Allocate the forwarding tables for the rings attached to the bridge ports. 355262153Sluigi */ 356262153Sluigistatic int 357262153Sluiginm_alloc_bdgfwd(struct netmap_adapter *na) 358262153Sluigi{ 359262153Sluigi int nrings, l, i, num_dstq; 360262153Sluigi struct netmap_kring *kring; 361262153Sluigi 362262153Sluigi NMG_LOCK_ASSERT(); 363262153Sluigi /* all port:rings + broadcast */ 364262153Sluigi num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1; 365262153Sluigi l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX; 366262153Sluigi l += sizeof(struct nm_bdg_q) * num_dstq; 367262153Sluigi l += sizeof(uint16_t) * NM_BDG_BATCH_MAX; 368262153Sluigi 369262153Sluigi nrings = netmap_real_tx_rings(na); 370262153Sluigi kring = na->tx_rings; 371262153Sluigi for (i = 0; i < nrings; i++) { 372262153Sluigi struct nm_bdg_fwd *ft; 373262153Sluigi struct nm_bdg_q *dstq; 374262153Sluigi int j; 375262153Sluigi 376262153Sluigi ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO); 377262153Sluigi if (!ft) { 378262153Sluigi nm_free_bdgfwd(na); 379262153Sluigi return ENOMEM; 380262153Sluigi } 381262153Sluigi dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); 382262153Sluigi for (j = 0; j < num_dstq; j++) { 383262153Sluigi dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL; 384262153Sluigi dstq[j].bq_len = 0; 385262153Sluigi } 386262153Sluigi kring[i].nkr_ft = ft; 387262153Sluigi } 388262153Sluigi return 0; 389262153Sluigi} 390262153Sluigi 391262153Sluigi 392262153Sluigistatic void 393262153Sluiginetmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw) 394262153Sluigi{ 395262153Sluigi int s_hw = hw, s_sw = sw; 396262153Sluigi int i, lim =b->bdg_active_ports; 397262153Sluigi uint8_t tmp[NM_BDG_MAXPORTS]; 398262153Sluigi 399262153Sluigi /* 400262153Sluigi New algorithm: 401262153Sluigi make a copy of bdg_port_index; 402262153Sluigi lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port 403262153Sluigi in the array of bdg_port_index, replacing them with 404262153Sluigi entries from the bottom of the array; 405262153Sluigi decrement bdg_active_ports; 406262153Sluigi acquire BDG_WLOCK() and copy back the array. 407262153Sluigi */ 408262153Sluigi 409262153Sluigi if (netmap_verbose) 410262153Sluigi D("detach %d and %d (lim %d)", hw, sw, lim); 411262153Sluigi /* make a copy of the list of active ports, update it, 412262153Sluigi * and then copy back within BDG_WLOCK(). 413262153Sluigi */ 414262153Sluigi memcpy(tmp, b->bdg_port_index, sizeof(tmp)); 415262153Sluigi for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) { 416262153Sluigi if (hw >= 0 && tmp[i] == hw) { 417262153Sluigi ND("detach hw %d at %d", hw, i); 418262153Sluigi lim--; /* point to last active port */ 419262153Sluigi tmp[i] = tmp[lim]; /* swap with i */ 420262153Sluigi tmp[lim] = hw; /* now this is inactive */ 421262153Sluigi hw = -1; 422262153Sluigi } else if (sw >= 0 && tmp[i] == sw) { 423262153Sluigi ND("detach sw %d at %d", sw, i); 424262153Sluigi lim--; 425262153Sluigi tmp[i] = tmp[lim]; 426262153Sluigi tmp[lim] = sw; 427262153Sluigi sw = -1; 428262153Sluigi } else { 429262153Sluigi i++; 430262153Sluigi } 431262153Sluigi } 432262153Sluigi if (hw >= 0 || sw >= 0) { 433262153Sluigi D("XXX delete failed hw %d sw %d, should panic...", hw, sw); 434262153Sluigi } 435262153Sluigi 436262153Sluigi BDG_WLOCK(b); 437262153Sluigi b->bdg_ports[s_hw] = NULL; 438262153Sluigi if (s_sw >= 0) { 439262153Sluigi b->bdg_ports[s_sw] = NULL; 440262153Sluigi } 441262153Sluigi memcpy(b->bdg_port_index, tmp, sizeof(tmp)); 442262153Sluigi b->bdg_active_ports = lim; 443262153Sluigi BDG_WUNLOCK(b); 444262153Sluigi 445262153Sluigi ND("now %d active ports", lim); 446262153Sluigi if (lim == 0) { 447262153Sluigi ND("marking bridge %s as free", b->bdg_basename); 448262153Sluigi b->nm_bdg_lookup = NULL; 449262153Sluigi } 450262153Sluigi} 451262153Sluigi 452262153Sluigi 453262153Sluigistatic void 454262153Sluiginetmap_adapter_vp_dtor(struct netmap_adapter *na) 455262153Sluigi{ 456262153Sluigi struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na; 457262153Sluigi struct nm_bridge *b = vpna->na_bdg; 458262153Sluigi struct ifnet *ifp = na->ifp; 459262153Sluigi 460262153Sluigi ND("%s has %d references", NM_IFPNAME(ifp), na->na_refcount); 461262153Sluigi 462262153Sluigi if (b) { 463262153Sluigi netmap_bdg_detach_common(b, vpna->bdg_port, -1); 464262153Sluigi } 465262153Sluigi 466262153Sluigi bzero(ifp, sizeof(*ifp)); 467262153Sluigi free(ifp, M_DEVBUF); 468262153Sluigi na->ifp = NULL; 469262153Sluigi} 470262153Sluigi 471262153Sluigi 472262153Sluigi/* Try to get a reference to a netmap adapter attached to a VALE switch. 473262153Sluigi * If the adapter is found (or is created), this function returns 0, a 474262153Sluigi * non NULL pointer is returned into *na, and the caller holds a 475262153Sluigi * reference to the adapter. 476262153Sluigi * If an adapter is not found, then no reference is grabbed and the 477262153Sluigi * function returns an error code, or 0 if there is just a VALE prefix 478262153Sluigi * mismatch. Therefore the caller holds a reference when 479262153Sluigi * (*na != NULL && return == 0). 480262153Sluigi */ 481262153Sluigiint 482262153Sluiginetmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create) 483262153Sluigi{ 484262153Sluigi const char *name = nmr->nr_name; 485262153Sluigi struct ifnet *ifp; 486262153Sluigi int error = 0; 487262153Sluigi struct netmap_adapter *ret; 488262153Sluigi struct netmap_vp_adapter *vpna; 489262153Sluigi struct nm_bridge *b; 490262153Sluigi int i, j, cand = -1, cand2 = -1; 491262153Sluigi int needed; 492262153Sluigi 493262153Sluigi *na = NULL; /* default return value */ 494262153Sluigi 495262153Sluigi /* first try to see if this is a bridge port. */ 496262153Sluigi NMG_LOCK_ASSERT(); 497262153Sluigi if (strncmp(name, NM_NAME, sizeof(NM_NAME) - 1)) { 498262153Sluigi return 0; /* no error, but no VALE prefix */ 499262153Sluigi } 500262153Sluigi 501262153Sluigi b = nm_find_bridge(name, create); 502262153Sluigi if (b == NULL) { 503262153Sluigi D("no bridges available for '%s'", name); 504262153Sluigi return (create ? ENOMEM : ENXIO); 505262153Sluigi } 506262153Sluigi 507262153Sluigi /* Now we are sure that name starts with the bridge's name, 508262153Sluigi * lookup the port in the bridge. We need to scan the entire 509262153Sluigi * list. It is not important to hold a WLOCK on the bridge 510262153Sluigi * during the search because NMG_LOCK already guarantees 511262153Sluigi * that there are no other possible writers. 512262153Sluigi */ 513262153Sluigi 514262153Sluigi /* lookup in the local list of ports */ 515262153Sluigi for (j = 0; j < b->bdg_active_ports; j++) { 516262153Sluigi i = b->bdg_port_index[j]; 517262153Sluigi vpna = b->bdg_ports[i]; 518262153Sluigi // KASSERT(na != NULL); 519262153Sluigi ifp = vpna->up.ifp; 520262153Sluigi /* XXX make sure the name only contains one : */ 521262153Sluigi if (!strcmp(NM_IFPNAME(ifp), name)) { 522262153Sluigi netmap_adapter_get(&vpna->up); 523262153Sluigi ND("found existing if %s refs %d", name, 524262153Sluigi vpna->na_bdg_refcount); 525262153Sluigi *na = (struct netmap_adapter *)vpna; 526262153Sluigi return 0; 527262153Sluigi } 528262153Sluigi } 529262153Sluigi /* not found, should we create it? */ 530262153Sluigi if (!create) 531262153Sluigi return ENXIO; 532262153Sluigi /* yes we should, see if we have space to attach entries */ 533262153Sluigi needed = 2; /* in some cases we only need 1 */ 534262153Sluigi if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) { 535262153Sluigi D("bridge full %d, cannot create new port", b->bdg_active_ports); 536262153Sluigi return ENOMEM; 537262153Sluigi } 538262153Sluigi /* record the next two ports available, but do not allocate yet */ 539262153Sluigi cand = b->bdg_port_index[b->bdg_active_ports]; 540262153Sluigi cand2 = b->bdg_port_index[b->bdg_active_ports + 1]; 541262153Sluigi ND("+++ bridge %s port %s used %d avail %d %d", 542262153Sluigi b->bdg_basename, name, b->bdg_active_ports, cand, cand2); 543262153Sluigi 544262153Sluigi /* 545262153Sluigi * try see if there is a matching NIC with this name 546262153Sluigi * (after the bridge's name) 547262153Sluigi */ 548262153Sluigi ifp = ifunit_ref(name + b->bdg_namelen + 1); 549262153Sluigi if (!ifp) { /* this is a virtual port */ 550262153Sluigi if (nmr->nr_cmd) { 551262153Sluigi /* nr_cmd must be 0 for a virtual port */ 552262153Sluigi return EINVAL; 553262153Sluigi } 554262153Sluigi 555262153Sluigi /* create a struct ifnet for the new port. 556262153Sluigi * need M_NOWAIT as we are under nma_lock 557262153Sluigi */ 558262153Sluigi ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO); 559262153Sluigi if (!ifp) 560262153Sluigi return ENOMEM; 561262153Sluigi 562262153Sluigi strcpy(ifp->if_xname, name); 563262153Sluigi /* bdg_netmap_attach creates a struct netmap_adapter */ 564262153Sluigi error = bdg_netmap_attach(nmr, ifp); 565262153Sluigi if (error) { 566262153Sluigi D("error %d", error); 567262153Sluigi free(ifp, M_DEVBUF); 568262153Sluigi return error; 569262153Sluigi } 570262153Sluigi ret = NA(ifp); 571262153Sluigi cand2 = -1; /* only need one port */ 572262153Sluigi } else { /* this is a NIC */ 573262153Sluigi struct ifnet *fake_ifp; 574262153Sluigi 575262153Sluigi error = netmap_get_hw_na(ifp, &ret); 576262153Sluigi if (error || ret == NULL) 577262153Sluigi goto out; 578262153Sluigi 579262153Sluigi /* make sure the NIC is not already in use */ 580262153Sluigi if (NETMAP_OWNED_BY_ANY(ret)) { 581262153Sluigi D("NIC %s busy, cannot attach to bridge", 582262153Sluigi NM_IFPNAME(ifp)); 583262153Sluigi error = EBUSY; 584262153Sluigi goto out; 585262153Sluigi } 586262153Sluigi /* create a fake interface */ 587262153Sluigi fake_ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO); 588262153Sluigi if (!fake_ifp) { 589262153Sluigi error = ENOMEM; 590262153Sluigi goto out; 591262153Sluigi } 592262153Sluigi strcpy(fake_ifp->if_xname, name); 593262153Sluigi error = netmap_bwrap_attach(fake_ifp, ifp); 594262153Sluigi if (error) { 595262153Sluigi free(fake_ifp, M_DEVBUF); 596262153Sluigi goto out; 597262153Sluigi } 598262153Sluigi ret = NA(fake_ifp); 599262153Sluigi if (nmr->nr_arg1 != NETMAP_BDG_HOST) 600262153Sluigi cand2 = -1; /* only need one port */ 601262153Sluigi if_rele(ifp); 602262153Sluigi } 603262153Sluigi vpna = (struct netmap_vp_adapter *)ret; 604262153Sluigi 605262153Sluigi BDG_WLOCK(b); 606262153Sluigi vpna->bdg_port = cand; 607262153Sluigi ND("NIC %p to bridge port %d", vpna, cand); 608262153Sluigi /* bind the port to the bridge (virtual ports are not active) */ 609262153Sluigi b->bdg_ports[cand] = vpna; 610262153Sluigi vpna->na_bdg = b; 611262153Sluigi b->bdg_active_ports++; 612262153Sluigi if (cand2 >= 0) { 613262153Sluigi struct netmap_vp_adapter *hostna = vpna + 1; 614262153Sluigi /* also bind the host stack to the bridge */ 615262153Sluigi b->bdg_ports[cand2] = hostna; 616262153Sluigi hostna->bdg_port = cand2; 617262153Sluigi hostna->na_bdg = b; 618262153Sluigi b->bdg_active_ports++; 619262153Sluigi ND("host %p to bridge port %d", hostna, cand2); 620262153Sluigi } 621262153Sluigi ND("if %s refs %d", name, vpna->up.na_refcount); 622262153Sluigi BDG_WUNLOCK(b); 623262153Sluigi *na = ret; 624262153Sluigi netmap_adapter_get(ret); 625262153Sluigi return 0; 626262153Sluigi 627262153Sluigiout: 628262153Sluigi if_rele(ifp); 629262153Sluigi 630262153Sluigi return error; 631262153Sluigi} 632262153Sluigi 633262153Sluigi 634262153Sluigi/* Process NETMAP_BDG_ATTACH and NETMAP_BDG_DETACH */ 635262153Sluigistatic int 636262153Sluiginm_bdg_attach(struct nmreq *nmr) 637262153Sluigi{ 638262153Sluigi struct netmap_adapter *na; 639262153Sluigi struct netmap_if *nifp; 640262153Sluigi struct netmap_priv_d *npriv; 641262153Sluigi struct netmap_bwrap_adapter *bna; 642262153Sluigi int error; 643262153Sluigi 644262153Sluigi npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO); 645262153Sluigi if (npriv == NULL) 646262153Sluigi return ENOMEM; 647262153Sluigi 648262153Sluigi NMG_LOCK(); 649262153Sluigi 650262153Sluigi error = netmap_get_bdg_na(nmr, &na, 1 /* create if not exists */); 651262153Sluigi if (error) /* no device, or another bridge or user owns the device */ 652262153Sluigi goto unlock_exit; 653262153Sluigi 654262153Sluigi if (na == NULL) { /* VALE prefix missing */ 655262153Sluigi error = EINVAL; 656262153Sluigi goto unlock_exit; 657262153Sluigi } 658262153Sluigi 659262153Sluigi if (na->active_fds > 0) { /* already registered */ 660262153Sluigi error = EBUSY; 661262153Sluigi goto unref_exit; 662262153Sluigi } 663262153Sluigi 664262153Sluigi nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags, &error); 665262153Sluigi if (!nifp) { 666262153Sluigi goto unref_exit; 667262153Sluigi } 668262153Sluigi 669262153Sluigi bna = (struct netmap_bwrap_adapter*)na; 670262153Sluigi bna->na_kpriv = npriv; 671262153Sluigi NMG_UNLOCK(); 672262153Sluigi ND("registered %s to netmap-mode", NM_IFPNAME(na->ifp)); 673262153Sluigi return 0; 674262153Sluigi 675262153Sluigiunref_exit: 676262153Sluigi netmap_adapter_put(na); 677262153Sluigiunlock_exit: 678262153Sluigi NMG_UNLOCK(); 679262153Sluigi bzero(npriv, sizeof(*npriv)); 680262153Sluigi free(npriv, M_DEVBUF); 681262153Sluigi return error; 682262153Sluigi} 683262153Sluigi 684262153Sluigi 685262153Sluigistatic int 686262153Sluiginm_bdg_detach(struct nmreq *nmr) 687262153Sluigi{ 688262153Sluigi struct netmap_adapter *na; 689262153Sluigi int error; 690262153Sluigi struct netmap_bwrap_adapter *bna; 691262153Sluigi int last_instance; 692262153Sluigi 693262153Sluigi NMG_LOCK(); 694262153Sluigi error = netmap_get_bdg_na(nmr, &na, 0 /* don't create */); 695262153Sluigi if (error) { /* no device, or another bridge or user owns the device */ 696262153Sluigi goto unlock_exit; 697262153Sluigi } 698262153Sluigi 699262153Sluigi if (na == NULL) { /* VALE prefix missing */ 700262153Sluigi error = EINVAL; 701262153Sluigi goto unlock_exit; 702262153Sluigi } 703262153Sluigi 704262153Sluigi bna = (struct netmap_bwrap_adapter *)na; 705262153Sluigi 706262153Sluigi if (na->active_fds == 0) { /* not registered */ 707262153Sluigi error = EINVAL; 708262153Sluigi goto unref_exit; 709262153Sluigi } 710262153Sluigi 711262153Sluigi last_instance = netmap_dtor_locked(bna->na_kpriv); /* unregister */ 712262153Sluigi if (!last_instance) { 713262153Sluigi D("--- error, trying to detach an entry with active mmaps"); 714262153Sluigi error = EINVAL; 715262153Sluigi } else { 716262153Sluigi struct netmap_priv_d *npriv = bna->na_kpriv; 717262153Sluigi 718262153Sluigi bna->na_kpriv = NULL; 719262153Sluigi D("deleting priv"); 720262153Sluigi 721262153Sluigi bzero(npriv, sizeof(*npriv)); 722262153Sluigi free(npriv, M_DEVBUF); 723262153Sluigi } 724262153Sluigi 725262153Sluigiunref_exit: 726262153Sluigi netmap_adapter_put(na); 727262153Sluigiunlock_exit: 728262153Sluigi NMG_UNLOCK(); 729262153Sluigi return error; 730262153Sluigi 731262153Sluigi} 732262153Sluigi 733262153Sluigi 734262153Sluigi/* exported to kernel callers, e.g. OVS ? 735262153Sluigi * Entry point. 736262153Sluigi * Called without NMG_LOCK. 737262153Sluigi */ 738262153Sluigiint 739262153Sluiginetmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func) 740262153Sluigi{ 741262153Sluigi struct nm_bridge *b; 742262153Sluigi struct netmap_adapter *na; 743262153Sluigi struct netmap_vp_adapter *vpna; 744262153Sluigi struct ifnet *iter; 745262153Sluigi char *name = nmr->nr_name; 746262153Sluigi int cmd = nmr->nr_cmd, namelen = strlen(name); 747262153Sluigi int error = 0, i, j; 748262153Sluigi 749262153Sluigi switch (cmd) { 750262153Sluigi case NETMAP_BDG_ATTACH: 751262153Sluigi error = nm_bdg_attach(nmr); 752262153Sluigi break; 753262153Sluigi 754262153Sluigi case NETMAP_BDG_DETACH: 755262153Sluigi error = nm_bdg_detach(nmr); 756262153Sluigi break; 757262153Sluigi 758262153Sluigi case NETMAP_BDG_LIST: 759262153Sluigi /* this is used to enumerate bridges and ports */ 760262153Sluigi if (namelen) { /* look up indexes of bridge and port */ 761262153Sluigi if (strncmp(name, NM_NAME, strlen(NM_NAME))) { 762262153Sluigi error = EINVAL; 763262153Sluigi break; 764262153Sluigi } 765262153Sluigi NMG_LOCK(); 766262153Sluigi b = nm_find_bridge(name, 0 /* don't create */); 767262153Sluigi if (!b) { 768262153Sluigi error = ENOENT; 769262153Sluigi NMG_UNLOCK(); 770262153Sluigi break; 771262153Sluigi } 772262153Sluigi 773262153Sluigi error = ENOENT; 774262153Sluigi for (j = 0; j < b->bdg_active_ports; j++) { 775262153Sluigi i = b->bdg_port_index[j]; 776262153Sluigi vpna = b->bdg_ports[i]; 777262153Sluigi if (vpna == NULL) { 778262153Sluigi D("---AAAAAAAAARGH-------"); 779262153Sluigi continue; 780262153Sluigi } 781262153Sluigi iter = vpna->up.ifp; 782262153Sluigi /* the former and the latter identify a 783262153Sluigi * virtual port and a NIC, respectively 784262153Sluigi */ 785262153Sluigi if (!strcmp(iter->if_xname, name)) { 786262153Sluigi /* bridge index */ 787262153Sluigi nmr->nr_arg1 = b - nm_bridges; 788262153Sluigi nmr->nr_arg2 = i; /* port index */ 789262153Sluigi error = 0; 790262153Sluigi break; 791262153Sluigi } 792262153Sluigi } 793262153Sluigi NMG_UNLOCK(); 794262153Sluigi } else { 795262153Sluigi /* return the first non-empty entry starting from 796262153Sluigi * bridge nr_arg1 and port nr_arg2. 797262153Sluigi * 798262153Sluigi * Users can detect the end of the same bridge by 799262153Sluigi * seeing the new and old value of nr_arg1, and can 800262153Sluigi * detect the end of all the bridge by error != 0 801262153Sluigi */ 802262153Sluigi i = nmr->nr_arg1; 803262153Sluigi j = nmr->nr_arg2; 804262153Sluigi 805262153Sluigi NMG_LOCK(); 806262153Sluigi for (error = ENOENT; i < NM_BRIDGES; i++) { 807262153Sluigi b = nm_bridges + i; 808262153Sluigi if (j >= b->bdg_active_ports) { 809262153Sluigi j = 0; /* following bridges scan from 0 */ 810262153Sluigi continue; 811262153Sluigi } 812262153Sluigi nmr->nr_arg1 = i; 813262153Sluigi nmr->nr_arg2 = j; 814262153Sluigi j = b->bdg_port_index[j]; 815262153Sluigi vpna = b->bdg_ports[j]; 816262153Sluigi iter = vpna->up.ifp; 817262153Sluigi strncpy(name, iter->if_xname, (size_t)IFNAMSIZ); 818262153Sluigi error = 0; 819262153Sluigi break; 820262153Sluigi } 821262153Sluigi NMG_UNLOCK(); 822262153Sluigi } 823262153Sluigi break; 824262153Sluigi 825262153Sluigi case NETMAP_BDG_LOOKUP_REG: 826262153Sluigi /* register a lookup function to the given bridge. 827262153Sluigi * nmr->nr_name may be just bridge's name (including ':' 828262153Sluigi * if it is not just NM_NAME). 829262153Sluigi */ 830262153Sluigi if (!func) { 831262153Sluigi error = EINVAL; 832262153Sluigi break; 833262153Sluigi } 834262153Sluigi NMG_LOCK(); 835262153Sluigi b = nm_find_bridge(name, 0 /* don't create */); 836262153Sluigi if (!b) { 837262153Sluigi error = EINVAL; 838262153Sluigi } else { 839262153Sluigi b->nm_bdg_lookup = func; 840262153Sluigi } 841262153Sluigi NMG_UNLOCK(); 842262153Sluigi break; 843262153Sluigi 844262153Sluigi case NETMAP_BDG_VNET_HDR: 845262153Sluigi /* Valid lengths for the virtio-net header are 0 (no header), 846262153Sluigi 10 and 12. */ 847262153Sluigi if (nmr->nr_arg1 != 0 && 848262153Sluigi nmr->nr_arg1 != sizeof(struct nm_vnet_hdr) && 849262153Sluigi nmr->nr_arg1 != 12) { 850262153Sluigi error = EINVAL; 851262153Sluigi break; 852262153Sluigi } 853262153Sluigi NMG_LOCK(); 854262153Sluigi error = netmap_get_bdg_na(nmr, &na, 0); 855262153Sluigi if (na && !error) { 856262153Sluigi vpna = (struct netmap_vp_adapter *)na; 857262153Sluigi vpna->virt_hdr_len = nmr->nr_arg1; 858262153Sluigi if (vpna->virt_hdr_len) 859262153Sluigi vpna->mfs = NETMAP_BDG_BUF_SIZE(na->nm_mem); 860262153Sluigi D("Using vnet_hdr_len %d for %p", vpna->virt_hdr_len, vpna); 861262153Sluigi netmap_adapter_put(na); 862262153Sluigi } 863262153Sluigi NMG_UNLOCK(); 864262153Sluigi break; 865262153Sluigi 866262153Sluigi default: 867262153Sluigi D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd); 868262153Sluigi error = EINVAL; 869262153Sluigi break; 870262153Sluigi } 871262153Sluigi return error; 872262153Sluigi} 873262153Sluigi 874262153Sluigistatic int 875262153Sluiginetmap_vp_krings_create(struct netmap_adapter *na) 876262153Sluigi{ 877262153Sluigi u_int tailroom; 878262153Sluigi int error, i; 879262153Sluigi uint32_t *leases; 880262153Sluigi u_int nrx = netmap_real_rx_rings(na); 881262153Sluigi 882262153Sluigi /* 883262153Sluigi * Leases are attached to RX rings on vale ports 884262153Sluigi */ 885262153Sluigi tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx; 886262153Sluigi 887262153Sluigi error = netmap_krings_create(na, tailroom); 888262153Sluigi if (error) 889262153Sluigi return error; 890262153Sluigi 891262153Sluigi leases = na->tailroom; 892262153Sluigi 893262153Sluigi for (i = 0; i < nrx; i++) { /* Receive rings */ 894262153Sluigi na->rx_rings[i].nkr_leases = leases; 895262153Sluigi leases += na->num_rx_desc; 896262153Sluigi } 897262153Sluigi 898262153Sluigi error = nm_alloc_bdgfwd(na); 899262153Sluigi if (error) { 900262153Sluigi netmap_krings_delete(na); 901262153Sluigi return error; 902262153Sluigi } 903262153Sluigi 904262153Sluigi return 0; 905262153Sluigi} 906262153Sluigi 907262153Sluigi 908262153Sluigistatic void 909262153Sluiginetmap_vp_krings_delete(struct netmap_adapter *na) 910262153Sluigi{ 911262153Sluigi nm_free_bdgfwd(na); 912262153Sluigi netmap_krings_delete(na); 913262153Sluigi} 914262153Sluigi 915262153Sluigi 916262153Sluigistatic int 917262153Sluiginm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, 918262153Sluigi struct netmap_vp_adapter *na, u_int ring_nr); 919262153Sluigi 920262153Sluigi 921262153Sluigi/* 922262153Sluigi * Grab packets from a kring, move them into the ft structure 923262153Sluigi * associated to the tx (input) port. Max one instance per port, 924262153Sluigi * filtered on input (ioctl, poll or XXX). 925262153Sluigi * Returns the next position in the ring. 926262153Sluigi */ 927262153Sluigistatic int 928262153Sluiginm_bdg_preflush(struct netmap_vp_adapter *na, u_int ring_nr, 929262153Sluigi struct netmap_kring *kring, u_int end) 930262153Sluigi{ 931262153Sluigi struct netmap_ring *ring = kring->ring; 932262153Sluigi struct nm_bdg_fwd *ft; 933262153Sluigi u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1; 934262153Sluigi u_int ft_i = 0; /* start from 0 */ 935262153Sluigi u_int frags = 1; /* how many frags ? */ 936262153Sluigi struct nm_bridge *b = na->na_bdg; 937262153Sluigi 938262153Sluigi /* To protect against modifications to the bridge we acquire a 939262153Sluigi * shared lock, waiting if we can sleep (if the source port is 940262153Sluigi * attached to a user process) or with a trylock otherwise (NICs). 941262153Sluigi */ 942262153Sluigi ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j); 943262153Sluigi if (na->up.na_flags & NAF_BDG_MAYSLEEP) 944262153Sluigi BDG_RLOCK(b); 945262153Sluigi else if (!BDG_RTRYLOCK(b)) 946262153Sluigi return 0; 947262153Sluigi ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j); 948262153Sluigi ft = kring->nkr_ft; 949262153Sluigi 950262153Sluigi for (; likely(j != end); j = nm_next(j, lim)) { 951262153Sluigi struct netmap_slot *slot = &ring->slot[j]; 952262153Sluigi char *buf; 953262153Sluigi 954262153Sluigi ft[ft_i].ft_len = slot->len; 955262153Sluigi ft[ft_i].ft_flags = slot->flags; 956262153Sluigi 957262153Sluigi ND("flags is 0x%x", slot->flags); 958262153Sluigi /* this slot goes into a list so initialize the link field */ 959262153Sluigi ft[ft_i].ft_next = NM_FT_NULL; 960262153Sluigi buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ? 961262153Sluigi (void *)(uintptr_t)slot->ptr : BDG_NMB(&na->up, slot); 962262153Sluigi __builtin_prefetch(buf); 963262153Sluigi ++ft_i; 964262153Sluigi if (slot->flags & NS_MOREFRAG) { 965262153Sluigi frags++; 966262153Sluigi continue; 967262153Sluigi } 968262153Sluigi if (unlikely(netmap_verbose && frags > 1)) 969262153Sluigi RD(5, "%d frags at %d", frags, ft_i - frags); 970262153Sluigi ft[ft_i - frags].ft_frags = frags; 971262153Sluigi frags = 1; 972262153Sluigi if (unlikely((int)ft_i >= bridge_batch)) 973262153Sluigi ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); 974262153Sluigi } 975262153Sluigi if (frags > 1) { 976262153Sluigi D("truncate incomplete fragment at %d (%d frags)", ft_i, frags); 977262153Sluigi // ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG 978262153Sluigi ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG; 979262153Sluigi ft[ft_i - frags].ft_frags = frags - 1; 980262153Sluigi } 981262153Sluigi if (ft_i) 982262153Sluigi ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); 983262153Sluigi BDG_RUNLOCK(b); 984262153Sluigi return j; 985262153Sluigi} 986262153Sluigi 987262153Sluigi 988262153Sluigi/* ----- FreeBSD if_bridge hash function ------- */ 989262153Sluigi 990262153Sluigi/* 991262153Sluigi * The following hash function is adapted from "Hash Functions" by Bob Jenkins 992262153Sluigi * ("Algorithm Alley", Dr. Dobbs Journal, September 1997). 993262153Sluigi * 994262153Sluigi * http://www.burtleburtle.net/bob/hash/spooky.html 995262153Sluigi */ 996262153Sluigi#define mix(a, b, c) \ 997262153Sluigido { \ 998262153Sluigi a -= b; a -= c; a ^= (c >> 13); \ 999262153Sluigi b -= c; b -= a; b ^= (a << 8); \ 1000262153Sluigi c -= a; c -= b; c ^= (b >> 13); \ 1001262153Sluigi a -= b; a -= c; a ^= (c >> 12); \ 1002262153Sluigi b -= c; b -= a; b ^= (a << 16); \ 1003262153Sluigi c -= a; c -= b; c ^= (b >> 5); \ 1004262153Sluigi a -= b; a -= c; a ^= (c >> 3); \ 1005262153Sluigi b -= c; b -= a; b ^= (a << 10); \ 1006262153Sluigi c -= a; c -= b; c ^= (b >> 15); \ 1007262153Sluigi} while (/*CONSTCOND*/0) 1008262153Sluigi 1009262153Sluigi 1010262153Sluigistatic __inline uint32_t 1011262153Sluiginm_bridge_rthash(const uint8_t *addr) 1012262153Sluigi{ 1013262153Sluigi uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key 1014262153Sluigi 1015262153Sluigi b += addr[5] << 8; 1016262153Sluigi b += addr[4]; 1017262153Sluigi a += addr[3] << 24; 1018262153Sluigi a += addr[2] << 16; 1019262153Sluigi a += addr[1] << 8; 1020262153Sluigi a += addr[0]; 1021262153Sluigi 1022262153Sluigi mix(a, b, c); 1023262153Sluigi#define BRIDGE_RTHASH_MASK (NM_BDG_HASH-1) 1024262153Sluigi return (c & BRIDGE_RTHASH_MASK); 1025262153Sluigi} 1026262153Sluigi 1027262153Sluigi#undef mix 1028262153Sluigi 1029262153Sluigi 1030262153Sluigistatic int 1031262153Sluigibdg_netmap_reg(struct netmap_adapter *na, int onoff) 1032262153Sluigi{ 1033262153Sluigi struct netmap_vp_adapter *vpna = 1034262153Sluigi (struct netmap_vp_adapter*)na; 1035262153Sluigi struct ifnet *ifp = na->ifp; 1036262153Sluigi 1037262153Sluigi /* the interface is already attached to the bridge, 1038262153Sluigi * so we only need to toggle IFCAP_NETMAP. 1039262153Sluigi */ 1040262153Sluigi BDG_WLOCK(vpna->na_bdg); 1041262153Sluigi if (onoff) { 1042262153Sluigi ifp->if_capenable |= IFCAP_NETMAP; 1043262153Sluigi } else { 1044262153Sluigi ifp->if_capenable &= ~IFCAP_NETMAP; 1045262153Sluigi } 1046262153Sluigi BDG_WUNLOCK(vpna->na_bdg); 1047262153Sluigi return 0; 1048262153Sluigi} 1049262153Sluigi 1050262153Sluigi 1051262153Sluigi/* 1052262153Sluigi * Lookup function for a learning bridge. 1053262153Sluigi * Update the hash table with the source address, 1054262153Sluigi * and then returns the destination port index, and the 1055262153Sluigi * ring in *dst_ring (at the moment, always use ring 0) 1056262153Sluigi */ 1057262153Sluigiu_int 1058262153Sluiginetmap_bdg_learning(char *buf, u_int buf_len, uint8_t *dst_ring, 1059262153Sluigi struct netmap_vp_adapter *na) 1060262153Sluigi{ 1061262153Sluigi struct nm_hash_ent *ht = na->na_bdg->ht; 1062262153Sluigi uint32_t sh, dh; 1063262153Sluigi u_int dst, mysrc = na->bdg_port; 1064262153Sluigi uint64_t smac, dmac; 1065262153Sluigi 1066262153Sluigi if (buf_len < 14) { 1067262153Sluigi D("invalid buf length %d", buf_len); 1068262153Sluigi return NM_BDG_NOPORT; 1069262153Sluigi } 1070262153Sluigi dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff; 1071262153Sluigi smac = le64toh(*(uint64_t *)(buf + 4)); 1072262153Sluigi smac >>= 16; 1073262153Sluigi 1074262153Sluigi /* 1075262153Sluigi * The hash is somewhat expensive, there might be some 1076262153Sluigi * worthwhile optimizations here. 1077262153Sluigi */ 1078262153Sluigi if ((buf[6] & 1) == 0) { /* valid src */ 1079262153Sluigi uint8_t *s = buf+6; 1080262153Sluigi sh = nm_bridge_rthash(s); // XXX hash of source 1081262153Sluigi /* update source port forwarding entry */ 1082262153Sluigi ht[sh].mac = smac; /* XXX expire ? */ 1083262153Sluigi ht[sh].ports = mysrc; 1084262153Sluigi if (netmap_verbose) 1085262153Sluigi D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d", 1086262153Sluigi s[0], s[1], s[2], s[3], s[4], s[5], mysrc); 1087262153Sluigi } 1088262153Sluigi dst = NM_BDG_BROADCAST; 1089262153Sluigi if ((buf[0] & 1) == 0) { /* unicast */ 1090262153Sluigi dh = nm_bridge_rthash(buf); // XXX hash of dst 1091262153Sluigi if (ht[dh].mac == dmac) { /* found dst */ 1092262153Sluigi dst = ht[dh].ports; 1093262153Sluigi } 1094262153Sluigi /* XXX otherwise return NM_BDG_UNKNOWN ? */ 1095262153Sluigi } 1096262153Sluigi *dst_ring = 0; 1097262153Sluigi return dst; 1098262153Sluigi} 1099262153Sluigi 1100262153Sluigi 1101262153Sluigi/* 1102262153Sluigi * Available space in the ring. Only used in VALE code 1103262153Sluigi * and only with is_rx = 1 1104262153Sluigi */ 1105262153Sluigistatic inline uint32_t 1106262153Sluiginm_kr_space(struct netmap_kring *k, int is_rx) 1107262153Sluigi{ 1108262153Sluigi int space; 1109262153Sluigi 1110262153Sluigi if (is_rx) { 1111262153Sluigi int busy = k->nkr_hwlease - k->nr_hwcur; 1112262153Sluigi if (busy < 0) 1113262153Sluigi busy += k->nkr_num_slots; 1114262153Sluigi space = k->nkr_num_slots - 1 - busy; 1115262153Sluigi } else { 1116262153Sluigi /* XXX never used in this branch */ 1117262153Sluigi space = k->nr_hwtail - k->nkr_hwlease; 1118262153Sluigi if (space < 0) 1119262153Sluigi space += k->nkr_num_slots; 1120262153Sluigi } 1121262153Sluigi#if 0 1122262153Sluigi // sanity check 1123262153Sluigi if (k->nkr_hwlease >= k->nkr_num_slots || 1124262153Sluigi k->nr_hwcur >= k->nkr_num_slots || 1125262153Sluigi k->nr_tail >= k->nkr_num_slots || 1126262153Sluigi busy < 0 || 1127262153Sluigi busy >= k->nkr_num_slots) { 1128262153Sluigi D("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d", k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease, 1129262153Sluigi k->nkr_lease_idx, k->nkr_num_slots); 1130262153Sluigi } 1131262153Sluigi#endif 1132262153Sluigi return space; 1133262153Sluigi} 1134262153Sluigi 1135262153Sluigi 1136262153Sluigi 1137262153Sluigi 1138262153Sluigi/* make a lease on the kring for N positions. return the 1139262153Sluigi * lease index 1140262153Sluigi * XXX only used in VALE code and with is_rx = 1 1141262153Sluigi */ 1142262153Sluigistatic inline uint32_t 1143262153Sluiginm_kr_lease(struct netmap_kring *k, u_int n, int is_rx) 1144262153Sluigi{ 1145262153Sluigi uint32_t lim = k->nkr_num_slots - 1; 1146262153Sluigi uint32_t lease_idx = k->nkr_lease_idx; 1147262153Sluigi 1148262153Sluigi k->nkr_leases[lease_idx] = NR_NOSLOT; 1149262153Sluigi k->nkr_lease_idx = nm_next(lease_idx, lim); 1150262153Sluigi 1151262153Sluigi if (n > nm_kr_space(k, is_rx)) { 1152262153Sluigi D("invalid request for %d slots", n); 1153262153Sluigi panic("x"); 1154262153Sluigi } 1155262153Sluigi /* XXX verify that there are n slots */ 1156262153Sluigi k->nkr_hwlease += n; 1157262153Sluigi if (k->nkr_hwlease > lim) 1158262153Sluigi k->nkr_hwlease -= lim + 1; 1159262153Sluigi 1160262153Sluigi if (k->nkr_hwlease >= k->nkr_num_slots || 1161262153Sluigi k->nr_hwcur >= k->nkr_num_slots || 1162262153Sluigi k->nr_hwtail >= k->nkr_num_slots || 1163262153Sluigi k->nkr_lease_idx >= k->nkr_num_slots) { 1164262153Sluigi D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d", 1165262153Sluigi k->na->ifp->if_xname, 1166262153Sluigi k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease, 1167262153Sluigi k->nkr_lease_idx, k->nkr_num_slots); 1168262153Sluigi } 1169262153Sluigi return lease_idx; 1170262153Sluigi} 1171262153Sluigi 1172262153Sluigi/* 1173262153Sluigi * This flush routine supports only unicast and broadcast but a large 1174262153Sluigi * number of ports, and lets us replace the learn and dispatch functions. 1175262153Sluigi */ 1176262153Sluigiint 1177262153Sluiginm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, 1178262153Sluigi u_int ring_nr) 1179262153Sluigi{ 1180262153Sluigi struct nm_bdg_q *dst_ents, *brddst; 1181262153Sluigi uint16_t num_dsts = 0, *dsts; 1182262153Sluigi struct nm_bridge *b = na->na_bdg; 1183262153Sluigi u_int i, j, me = na->bdg_port; 1184262153Sluigi 1185262153Sluigi /* 1186262153Sluigi * The work area (pointed by ft) is followed by an array of 1187262153Sluigi * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS 1188262153Sluigi * queues per port plus one for the broadcast traffic. 1189262153Sluigi * Then we have an array of destination indexes. 1190262153Sluigi */ 1191262153Sluigi dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); 1192262153Sluigi dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1); 1193262153Sluigi 1194262153Sluigi /* first pass: find a destination for each packet in the batch */ 1195262153Sluigi for (i = 0; likely(i < n); i += ft[i].ft_frags) { 1196262153Sluigi uint8_t dst_ring = ring_nr; /* default, same ring as origin */ 1197262153Sluigi uint16_t dst_port, d_i; 1198262153Sluigi struct nm_bdg_q *d; 1199262153Sluigi uint8_t *buf = ft[i].ft_buf; 1200262153Sluigi u_int len = ft[i].ft_len; 1201262153Sluigi 1202262153Sluigi ND("slot %d frags %d", i, ft[i].ft_frags); 1203262153Sluigi /* Drop the packet if the virtio-net header is not into the first 1204262153Sluigi fragment nor at the very beginning of the second. */ 1205262153Sluigi if (unlikely(na->virt_hdr_len > len)) 1206262153Sluigi continue; 1207262153Sluigi if (len == na->virt_hdr_len) { 1208262153Sluigi buf = ft[i+1].ft_buf; 1209262153Sluigi len = ft[i+1].ft_len; 1210262153Sluigi } else { 1211262153Sluigi buf += na->virt_hdr_len; 1212262153Sluigi len -= na->virt_hdr_len; 1213262153Sluigi } 1214262153Sluigi dst_port = b->nm_bdg_lookup(buf, len, &dst_ring, na); 1215262153Sluigi if (netmap_verbose > 255) 1216262153Sluigi RD(5, "slot %d port %d -> %d", i, me, dst_port); 1217262153Sluigi if (dst_port == NM_BDG_NOPORT) 1218262153Sluigi continue; /* this packet is identified to be dropped */ 1219262153Sluigi else if (unlikely(dst_port > NM_BDG_MAXPORTS)) 1220262153Sluigi continue; 1221262153Sluigi else if (dst_port == NM_BDG_BROADCAST) 1222262153Sluigi dst_ring = 0; /* broadcasts always go to ring 0 */ 1223262153Sluigi else if (unlikely(dst_port == me || 1224262153Sluigi !b->bdg_ports[dst_port])) 1225262153Sluigi continue; 1226262153Sluigi 1227262153Sluigi /* get a position in the scratch pad */ 1228262153Sluigi d_i = dst_port * NM_BDG_MAXRINGS + dst_ring; 1229262153Sluigi d = dst_ents + d_i; 1230262153Sluigi 1231262153Sluigi /* append the first fragment to the list */ 1232262153Sluigi if (d->bq_head == NM_FT_NULL) { /* new destination */ 1233262153Sluigi d->bq_head = d->bq_tail = i; 1234262153Sluigi /* remember this position to be scanned later */ 1235262153Sluigi if (dst_port != NM_BDG_BROADCAST) 1236262153Sluigi dsts[num_dsts++] = d_i; 1237262153Sluigi } else { 1238262153Sluigi ft[d->bq_tail].ft_next = i; 1239262153Sluigi d->bq_tail = i; 1240262153Sluigi } 1241262153Sluigi d->bq_len += ft[i].ft_frags; 1242262153Sluigi } 1243262153Sluigi 1244262153Sluigi /* 1245262153Sluigi * Broadcast traffic goes to ring 0 on all destinations. 1246262153Sluigi * So we need to add these rings to the list of ports to scan. 1247262153Sluigi * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is 1248262153Sluigi * expensive. We should keep a compact list of active destinations 1249262153Sluigi * so we could shorten this loop. 1250262153Sluigi */ 1251262153Sluigi brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS; 1252262153Sluigi if (brddst->bq_head != NM_FT_NULL) { 1253262153Sluigi for (j = 0; likely(j < b->bdg_active_ports); j++) { 1254262153Sluigi uint16_t d_i; 1255262153Sluigi i = b->bdg_port_index[j]; 1256262153Sluigi if (unlikely(i == me)) 1257262153Sluigi continue; 1258262153Sluigi d_i = i * NM_BDG_MAXRINGS; 1259262153Sluigi if (dst_ents[d_i].bq_head == NM_FT_NULL) 1260262153Sluigi dsts[num_dsts++] = d_i; 1261262153Sluigi } 1262262153Sluigi } 1263262153Sluigi 1264262153Sluigi ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts); 1265262153Sluigi /* second pass: scan destinations (XXX will be modular somehow) */ 1266262153Sluigi for (i = 0; i < num_dsts; i++) { 1267262153Sluigi struct ifnet *dst_ifp; 1268262153Sluigi struct netmap_vp_adapter *dst_na; 1269262153Sluigi struct netmap_kring *kring; 1270262153Sluigi struct netmap_ring *ring; 1271262153Sluigi u_int dst_nr, lim, j, d_i, next, brd_next; 1272262153Sluigi u_int needed, howmany; 1273262153Sluigi int retry = netmap_txsync_retry; 1274262153Sluigi struct nm_bdg_q *d; 1275262153Sluigi uint32_t my_start = 0, lease_idx = 0; 1276262153Sluigi int nrings; 1277262153Sluigi int virt_hdr_mismatch = 0; 1278262153Sluigi 1279262153Sluigi d_i = dsts[i]; 1280262153Sluigi ND("second pass %d port %d", i, d_i); 1281262153Sluigi d = dst_ents + d_i; 1282262153Sluigi // XXX fix the division 1283262153Sluigi dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS]; 1284262153Sluigi /* protect from the lookup function returning an inactive 1285262153Sluigi * destination port 1286262153Sluigi */ 1287262153Sluigi if (unlikely(dst_na == NULL)) 1288262153Sluigi goto cleanup; 1289262153Sluigi if (dst_na->up.na_flags & NAF_SW_ONLY) 1290262153Sluigi goto cleanup; 1291262153Sluigi dst_ifp = dst_na->up.ifp; 1292262153Sluigi /* 1293262153Sluigi * The interface may be in !netmap mode in two cases: 1294262153Sluigi * - when na is attached but not activated yet; 1295262153Sluigi * - when na is being deactivated but is still attached. 1296262153Sluigi */ 1297262153Sluigi if (unlikely(!(dst_ifp->if_capenable & IFCAP_NETMAP))) { 1298262153Sluigi ND("not in netmap mode!"); 1299262153Sluigi goto cleanup; 1300262153Sluigi } 1301262153Sluigi 1302262153Sluigi /* there is at least one either unicast or broadcast packet */ 1303262153Sluigi brd_next = brddst->bq_head; 1304262153Sluigi next = d->bq_head; 1305262153Sluigi /* we need to reserve this many slots. If fewer are 1306262153Sluigi * available, some packets will be dropped. 1307262153Sluigi * Packets may have multiple fragments, so we may not use 1308262153Sluigi * there is a chance that we may not use all of the slots 1309262153Sluigi * we have claimed, so we will need to handle the leftover 1310262153Sluigi * ones when we regain the lock. 1311262153Sluigi */ 1312262153Sluigi needed = d->bq_len + brddst->bq_len; 1313262153Sluigi 1314262153Sluigi if (unlikely(dst_na->virt_hdr_len != na->virt_hdr_len)) { 1315262153Sluigi /* There is a virtio-net header/offloadings mismatch between 1316262153Sluigi * source and destination. The slower mismatch datapath will 1317262153Sluigi * be used to cope with all the mismatches. 1318262153Sluigi */ 1319262153Sluigi virt_hdr_mismatch = 1; 1320262153Sluigi if (dst_na->mfs < na->mfs) { 1321262153Sluigi /* We may need to do segmentation offloadings, and so 1322262153Sluigi * we may need a number of destination slots greater 1323262153Sluigi * than the number of input slots ('needed'). 1324262153Sluigi * We look for the smallest integer 'x' which satisfies: 1325262153Sluigi * needed * na->mfs + x * H <= x * na->mfs 1326262153Sluigi * where 'H' is the length of the longest header that may 1327262153Sluigi * be replicated in the segmentation process (e.g. for 1328262153Sluigi * TCPv4 we must account for ethernet header, IP header 1329262153Sluigi * and TCPv4 header). 1330262153Sluigi */ 1331262153Sluigi needed = (needed * na->mfs) / 1332262153Sluigi (dst_na->mfs - WORST_CASE_GSO_HEADER) + 1; 1333262153Sluigi ND(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed); 1334262153Sluigi } 1335262153Sluigi } 1336262153Sluigi 1337262153Sluigi ND(5, "pass 2 dst %d is %x %s", 1338262153Sluigi i, d_i, is_vp ? "virtual" : "nic/host"); 1339262153Sluigi dst_nr = d_i & (NM_BDG_MAXRINGS-1); 1340262153Sluigi nrings = dst_na->up.num_rx_rings; 1341262153Sluigi if (dst_nr >= nrings) 1342262153Sluigi dst_nr = dst_nr % nrings; 1343262153Sluigi kring = &dst_na->up.rx_rings[dst_nr]; 1344262153Sluigi ring = kring->ring; 1345262153Sluigi lim = kring->nkr_num_slots - 1; 1346262153Sluigi 1347262153Sluigiretry: 1348262153Sluigi 1349262153Sluigi if (dst_na->retry && retry) { 1350262153Sluigi /* try to get some free slot from the previous run */ 1351262153Sluigi dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0); 1352262153Sluigi } 1353262153Sluigi /* reserve the buffers in the queue and an entry 1354262153Sluigi * to report completion, and drop lock. 1355262153Sluigi * XXX this might become a helper function. 1356262153Sluigi */ 1357262153Sluigi mtx_lock(&kring->q_lock); 1358262153Sluigi if (kring->nkr_stopped) { 1359262153Sluigi mtx_unlock(&kring->q_lock); 1360262153Sluigi goto cleanup; 1361262153Sluigi } 1362262153Sluigi my_start = j = kring->nkr_hwlease; 1363262153Sluigi howmany = nm_kr_space(kring, 1); 1364262153Sluigi if (needed < howmany) 1365262153Sluigi howmany = needed; 1366262153Sluigi lease_idx = nm_kr_lease(kring, howmany, 1); 1367262153Sluigi mtx_unlock(&kring->q_lock); 1368262153Sluigi 1369262153Sluigi /* only retry if we need more than available slots */ 1370262153Sluigi if (retry && needed <= howmany) 1371262153Sluigi retry = 0; 1372262153Sluigi 1373262153Sluigi /* copy to the destination queue */ 1374262153Sluigi while (howmany > 0) { 1375262153Sluigi struct netmap_slot *slot; 1376262153Sluigi struct nm_bdg_fwd *ft_p, *ft_end; 1377262153Sluigi u_int cnt; 1378262153Sluigi 1379262153Sluigi /* find the queue from which we pick next packet. 1380262153Sluigi * NM_FT_NULL is always higher than valid indexes 1381262153Sluigi * so we never dereference it if the other list 1382262153Sluigi * has packets (and if both are empty we never 1383262153Sluigi * get here). 1384262153Sluigi */ 1385262153Sluigi if (next < brd_next) { 1386262153Sluigi ft_p = ft + next; 1387262153Sluigi next = ft_p->ft_next; 1388262153Sluigi } else { /* insert broadcast */ 1389262153Sluigi ft_p = ft + brd_next; 1390262153Sluigi brd_next = ft_p->ft_next; 1391262153Sluigi } 1392262153Sluigi cnt = ft_p->ft_frags; // cnt > 0 1393262153Sluigi if (unlikely(cnt > howmany)) 1394262153Sluigi break; /* no more space */ 1395262153Sluigi if (netmap_verbose && cnt > 1) 1396262153Sluigi RD(5, "rx %d frags to %d", cnt, j); 1397262153Sluigi ft_end = ft_p + cnt; 1398262153Sluigi if (unlikely(virt_hdr_mismatch)) { 1399262153Sluigi bdg_mismatch_datapath(na, dst_na, ft_p, ring, &j, lim, &howmany); 1400262153Sluigi } else { 1401262153Sluigi howmany -= cnt; 1402262153Sluigi do { 1403262153Sluigi char *dst, *src = ft_p->ft_buf; 1404262153Sluigi size_t copy_len = ft_p->ft_len, dst_len = copy_len; 1405262153Sluigi 1406262153Sluigi slot = &ring->slot[j]; 1407262153Sluigi dst = BDG_NMB(&dst_na->up, slot); 1408262153Sluigi 1409262153Sluigi ND("send [%d] %d(%d) bytes at %s:%d", 1410262153Sluigi i, (int)copy_len, (int)dst_len, 1411262153Sluigi NM_IFPNAME(dst_ifp), j); 1412262153Sluigi /* round to a multiple of 64 */ 1413262153Sluigi copy_len = (copy_len + 63) & ~63; 1414262153Sluigi 1415262153Sluigi if (ft_p->ft_flags & NS_INDIRECT) { 1416262153Sluigi if (copyin(src, dst, copy_len)) { 1417262153Sluigi // invalid user pointer, pretend len is 0 1418262153Sluigi dst_len = 0; 1419262153Sluigi } 1420262153Sluigi } else { 1421262153Sluigi //memcpy(dst, src, copy_len); 1422262153Sluigi pkt_copy(src, dst, (int)copy_len); 1423262153Sluigi } 1424262153Sluigi slot->len = dst_len; 1425262153Sluigi slot->flags = (cnt << 8)| NS_MOREFRAG; 1426262153Sluigi j = nm_next(j, lim); 1427262153Sluigi needed--; 1428262153Sluigi ft_p++; 1429262153Sluigi } while (ft_p != ft_end); 1430262153Sluigi slot->flags = (cnt << 8); /* clear flag on last entry */ 1431262153Sluigi } 1432262153Sluigi /* are we done ? */ 1433262153Sluigi if (next == NM_FT_NULL && brd_next == NM_FT_NULL) 1434262153Sluigi break; 1435262153Sluigi } 1436262153Sluigi { 1437262153Sluigi /* current position */ 1438262153Sluigi uint32_t *p = kring->nkr_leases; /* shorthand */ 1439262153Sluigi uint32_t update_pos; 1440262153Sluigi int still_locked = 1; 1441262153Sluigi 1442262153Sluigi mtx_lock(&kring->q_lock); 1443262153Sluigi if (unlikely(howmany > 0)) { 1444262153Sluigi /* not used all bufs. If i am the last one 1445262153Sluigi * i can recover the slots, otherwise must 1446262153Sluigi * fill them with 0 to mark empty packets. 1447262153Sluigi */ 1448262153Sluigi ND("leftover %d bufs", howmany); 1449262153Sluigi if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) { 1450262153Sluigi /* yes i am the last one */ 1451262153Sluigi ND("roll back nkr_hwlease to %d", j); 1452262153Sluigi kring->nkr_hwlease = j; 1453262153Sluigi } else { 1454262153Sluigi while (howmany-- > 0) { 1455262153Sluigi ring->slot[j].len = 0; 1456262153Sluigi ring->slot[j].flags = 0; 1457262153Sluigi j = nm_next(j, lim); 1458262153Sluigi } 1459262153Sluigi } 1460262153Sluigi } 1461262153Sluigi p[lease_idx] = j; /* report I am done */ 1462262153Sluigi 1463262153Sluigi update_pos = kring->nr_hwtail; 1464262153Sluigi 1465262153Sluigi if (my_start == update_pos) { 1466262153Sluigi /* all slots before my_start have been reported, 1467262153Sluigi * so scan subsequent leases to see if other ranges 1468262153Sluigi * have been completed, and to a selwakeup or txsync. 1469262153Sluigi */ 1470262153Sluigi while (lease_idx != kring->nkr_lease_idx && 1471262153Sluigi p[lease_idx] != NR_NOSLOT) { 1472262153Sluigi j = p[lease_idx]; 1473262153Sluigi p[lease_idx] = NR_NOSLOT; 1474262153Sluigi lease_idx = nm_next(lease_idx, lim); 1475262153Sluigi } 1476262153Sluigi /* j is the new 'write' position. j != my_start 1477262153Sluigi * means there are new buffers to report 1478262153Sluigi */ 1479262153Sluigi if (likely(j != my_start)) { 1480262153Sluigi kring->nr_hwtail = j; 1481262153Sluigi still_locked = 0; 1482262153Sluigi mtx_unlock(&kring->q_lock); 1483262153Sluigi dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0); 1484262153Sluigi if (dst_na->retry && retry--) 1485262153Sluigi goto retry; 1486262153Sluigi } 1487262153Sluigi } 1488262153Sluigi if (still_locked) 1489262153Sluigi mtx_unlock(&kring->q_lock); 1490262153Sluigi } 1491262153Sluigicleanup: 1492262153Sluigi d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */ 1493262153Sluigi d->bq_len = 0; 1494262153Sluigi } 1495262153Sluigi brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */ 1496262153Sluigi brddst->bq_len = 0; 1497262153Sluigi return 0; 1498262153Sluigi} 1499262153Sluigi 1500262153Sluigi 1501262153Sluigistatic int 1502262153Sluiginetmap_vp_txsync(struct netmap_vp_adapter *na, u_int ring_nr, int flags) 1503262153Sluigi{ 1504262153Sluigi struct netmap_kring *kring = &na->up.tx_rings[ring_nr]; 1505262153Sluigi u_int done; 1506262153Sluigi u_int const lim = kring->nkr_num_slots - 1; 1507262153Sluigi u_int const cur = kring->rcur; 1508262153Sluigi 1509262153Sluigi if (bridge_batch <= 0) { /* testing only */ 1510262153Sluigi done = cur; // used all 1511262153Sluigi goto done; 1512262153Sluigi } 1513262153Sluigi if (bridge_batch > NM_BDG_BATCH) 1514262153Sluigi bridge_batch = NM_BDG_BATCH; 1515262153Sluigi 1516262153Sluigi done = nm_bdg_preflush(na, ring_nr, kring, cur); 1517262153Sluigidone: 1518262153Sluigi if (done != cur) 1519262153Sluigi D("early break at %d/ %d, tail %d", done, cur, kring->nr_hwtail); 1520262153Sluigi /* 1521262153Sluigi * packets between 'done' and 'cur' are left unsent. 1522262153Sluigi */ 1523262153Sluigi kring->nr_hwcur = done; 1524262153Sluigi kring->nr_hwtail = nm_prev(done, lim); 1525262153Sluigi nm_txsync_finalize(kring); 1526262153Sluigi if (netmap_verbose) 1527262153Sluigi D("%s ring %d flags %d", NM_IFPNAME(na->up.ifp), ring_nr, flags); 1528262153Sluigi return 0; 1529262153Sluigi} 1530262153Sluigi 1531262153Sluigi 1532262153Sluigi/* 1533262153Sluigi * main dispatch routine for the bridge. 1534262153Sluigi * We already know that only one thread is running this. 1535262153Sluigi * we must run nm_bdg_preflush without lock. 1536262153Sluigi */ 1537262153Sluigistatic int 1538262153Sluigibdg_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) 1539262153Sluigi{ 1540262153Sluigi struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na; 1541262153Sluigi return netmap_vp_txsync(vpna, ring_nr, flags); 1542262153Sluigi} 1543262153Sluigi 1544262153Sluigistatic int 1545262153Sluiginetmap_vp_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) 1546262153Sluigi{ 1547262153Sluigi struct netmap_kring *kring = &na->rx_rings[ring_nr]; 1548262153Sluigi struct netmap_ring *ring = kring->ring; 1549262153Sluigi u_int nm_i, lim = kring->nkr_num_slots - 1; 1550262153Sluigi u_int head = nm_rxsync_prologue(kring); 1551262153Sluigi int n; 1552262153Sluigi 1553262153Sluigi if (head > lim) { 1554262153Sluigi D("ouch dangerous reset!!!"); 1555262153Sluigi n = netmap_ring_reinit(kring); 1556262153Sluigi goto done; 1557262153Sluigi } 1558262153Sluigi 1559262153Sluigi /* First part, import newly received packets. */ 1560262153Sluigi /* actually nothing to do here, they are already in the kring */ 1561262153Sluigi 1562262153Sluigi /* Second part, skip past packets that userspace has released. */ 1563262153Sluigi nm_i = kring->nr_hwcur; 1564262153Sluigi if (nm_i != head) { 1565262153Sluigi /* consistency check, but nothing really important here */ 1566262153Sluigi for (n = 0; likely(nm_i != head); n++) { 1567262153Sluigi struct netmap_slot *slot = &ring->slot[nm_i]; 1568262153Sluigi void *addr = BDG_NMB(na, slot); 1569262153Sluigi 1570262153Sluigi if (addr == netmap_buffer_base) { /* bad buf */ 1571262153Sluigi D("bad buffer index %d, ignore ?", 1572262153Sluigi slot->buf_idx); 1573262153Sluigi } 1574262153Sluigi slot->flags &= ~NS_BUF_CHANGED; 1575262153Sluigi nm_i = nm_next(nm_i, lim); 1576262153Sluigi } 1577262153Sluigi kring->nr_hwcur = head; 1578262153Sluigi } 1579262153Sluigi 1580262153Sluigi /* tell userspace that there are new packets */ 1581262153Sluigi nm_rxsync_finalize(kring); 1582262153Sluigi n = 0; 1583262153Sluigidone: 1584262153Sluigi return n; 1585262153Sluigi} 1586262153Sluigi 1587262153Sluigi/* 1588262153Sluigi * user process reading from a VALE switch. 1589262153Sluigi * Already protected against concurrent calls from userspace, 1590262153Sluigi * but we must acquire the queue's lock to protect against 1591262153Sluigi * writers on the same queue. 1592262153Sluigi */ 1593262153Sluigistatic int 1594262153Sluigibdg_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) 1595262153Sluigi{ 1596262153Sluigi struct netmap_kring *kring = &na->rx_rings[ring_nr]; 1597262153Sluigi int n; 1598262153Sluigi 1599262153Sluigi mtx_lock(&kring->q_lock); 1600262153Sluigi n = netmap_vp_rxsync(na, ring_nr, flags); 1601262153Sluigi mtx_unlock(&kring->q_lock); 1602262153Sluigi return n; 1603262153Sluigi} 1604262153Sluigi 1605262153Sluigi 1606262153Sluigistatic int 1607262153Sluigibdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp) 1608262153Sluigi{ 1609262153Sluigi struct netmap_vp_adapter *vpna; 1610262153Sluigi struct netmap_adapter *na; 1611262153Sluigi int error; 1612262153Sluigi u_int npipes = 0; 1613262153Sluigi 1614262153Sluigi vpna = malloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO); 1615262153Sluigi if (vpna == NULL) 1616262153Sluigi return ENOMEM; 1617262153Sluigi 1618262153Sluigi na = &vpna->up; 1619262153Sluigi 1620262153Sluigi na->ifp = ifp; 1621262153Sluigi 1622262153Sluigi /* bound checking */ 1623262153Sluigi na->num_tx_rings = nmr->nr_tx_rings; 1624262153Sluigi nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); 1625262153Sluigi nmr->nr_tx_rings = na->num_tx_rings; // write back 1626262153Sluigi na->num_rx_rings = nmr->nr_rx_rings; 1627262153Sluigi nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); 1628262153Sluigi nmr->nr_rx_rings = na->num_rx_rings; // write back 1629262153Sluigi nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE, 1630262153Sluigi 1, NM_BDG_MAXSLOTS, NULL); 1631262153Sluigi na->num_tx_desc = nmr->nr_tx_slots; 1632262153Sluigi nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE, 1633262153Sluigi 1, NM_BDG_MAXSLOTS, NULL); 1634262153Sluigi /* validate number of pipes. We want at least 1, 1635262153Sluigi * but probably can do with some more. 1636262153Sluigi * So let's use 2 as default (when 0 is supplied) 1637262153Sluigi */ 1638262153Sluigi npipes = nmr->nr_arg1; 1639262153Sluigi nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL); 1640262153Sluigi nmr->nr_arg1 = npipes; /* write back */ 1641262153Sluigi /* validate extra bufs */ 1642262153Sluigi nm_bound_var(&nmr->nr_arg3, 0, 0, 1643262153Sluigi 128*NM_BDG_MAXSLOTS, NULL); 1644262153Sluigi na->num_rx_desc = nmr->nr_rx_slots; 1645262153Sluigi vpna->virt_hdr_len = 0; 1646262153Sluigi vpna->mfs = 1514; 1647262153Sluigi /*if (vpna->mfs > netmap_buf_size) TODO netmap_buf_size is zero?? 1648262153Sluigi vpna->mfs = netmap_buf_size; */ 1649262153Sluigi if (netmap_verbose) 1650262153Sluigi D("max frame size %u", vpna->mfs); 1651262153Sluigi 1652262153Sluigi na->na_flags |= NAF_BDG_MAYSLEEP | NAF_MEM_OWNER; 1653262153Sluigi na->nm_txsync = bdg_netmap_txsync; 1654262153Sluigi na->nm_rxsync = bdg_netmap_rxsync; 1655262153Sluigi na->nm_register = bdg_netmap_reg; 1656262153Sluigi na->nm_dtor = netmap_adapter_vp_dtor; 1657262153Sluigi na->nm_krings_create = netmap_vp_krings_create; 1658262153Sluigi na->nm_krings_delete = netmap_vp_krings_delete; 1659262153Sluigi na->nm_mem = netmap_mem_private_new(NM_IFPNAME(na->ifp), 1660262153Sluigi na->num_tx_rings, na->num_tx_desc, 1661262153Sluigi na->num_rx_rings, na->num_rx_desc, 1662262153Sluigi nmr->nr_arg3, npipes, &error); 1663262153Sluigi if (na->nm_mem == NULL) 1664262153Sluigi goto err; 1665262153Sluigi /* other nmd fields are set in the common routine */ 1666262153Sluigi error = netmap_attach_common(na); 1667262153Sluigi if (error) 1668262153Sluigi goto err; 1669262153Sluigi return 0; 1670262153Sluigi 1671262153Sluigierr: 1672262153Sluigi if (na->nm_mem != NULL) 1673262153Sluigi netmap_mem_private_delete(na->nm_mem); 1674262153Sluigi free(vpna, M_DEVBUF); 1675262153Sluigi return error; 1676262153Sluigi} 1677262153Sluigi 1678262153Sluigi 1679262153Sluigistatic void 1680262153Sluiginetmap_bwrap_dtor(struct netmap_adapter *na) 1681262153Sluigi{ 1682262153Sluigi struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; 1683262153Sluigi struct netmap_adapter *hwna = bna->hwna; 1684262153Sluigi struct nm_bridge *b = bna->up.na_bdg, 1685262153Sluigi *bh = bna->host.na_bdg; 1686262153Sluigi struct ifnet *ifp = na->ifp; 1687262153Sluigi 1688262153Sluigi ND("na %p", na); 1689262153Sluigi 1690262153Sluigi if (b) { 1691262153Sluigi netmap_bdg_detach_common(b, bna->up.bdg_port, 1692262153Sluigi (bh ? bna->host.bdg_port : -1)); 1693262153Sluigi } 1694262153Sluigi 1695262153Sluigi hwna->na_private = NULL; 1696262153Sluigi netmap_adapter_put(hwna); 1697262153Sluigi 1698262153Sluigi bzero(ifp, sizeof(*ifp)); 1699262153Sluigi free(ifp, M_DEVBUF); 1700262153Sluigi na->ifp = NULL; 1701262153Sluigi 1702262153Sluigi} 1703262153Sluigi 1704262153Sluigi 1705262153Sluigi/* 1706262153Sluigi * Intr callback for NICs connected to a bridge. 1707262153Sluigi * Simply ignore tx interrupts (maybe we could try to recover space ?) 1708262153Sluigi * and pass received packets from nic to the bridge. 1709262153Sluigi * 1710262153Sluigi * XXX TODO check locking: this is called from the interrupt 1711262153Sluigi * handler so we should make sure that the interface is not 1712262153Sluigi * disconnected while passing down an interrupt. 1713262153Sluigi * 1714262153Sluigi * Note, no user process can access this NIC or the host stack. 1715262153Sluigi * The only part of the ring that is significant are the slots, 1716262153Sluigi * and head/cur/tail are set from the kring as needed 1717262153Sluigi * (part as a receive ring, part as a transmit ring). 1718262153Sluigi * 1719262153Sluigi * callback that overwrites the hwna notify callback. 1720262153Sluigi * Packets come from the outside or from the host stack and are put on an hwna rx ring. 1721262153Sluigi * The bridge wrapper then sends the packets through the bridge. 1722262153Sluigi */ 1723262153Sluigistatic int 1724262153Sluiginetmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, int flags) 1725262153Sluigi{ 1726262153Sluigi struct ifnet *ifp = na->ifp; 1727262153Sluigi struct netmap_bwrap_adapter *bna = na->na_private; 1728262153Sluigi struct netmap_vp_adapter *hostna = &bna->host; 1729262153Sluigi struct netmap_kring *kring, *bkring; 1730262153Sluigi struct netmap_ring *ring; 1731262153Sluigi int is_host_ring = ring_nr == na->num_rx_rings; 1732262153Sluigi struct netmap_vp_adapter *vpna = &bna->up; 1733262153Sluigi int error = 0; 1734262153Sluigi 1735262153Sluigi if (netmap_verbose) 1736262153Sluigi D("%s %s%d 0x%x", NM_IFPNAME(ifp), 1737262153Sluigi (tx == NR_TX ? "TX" : "RX"), ring_nr, flags); 1738262153Sluigi 1739262153Sluigi if (flags & NAF_DISABLE_NOTIFY) { 1740262153Sluigi kring = tx == NR_TX ? na->tx_rings : na->rx_rings; 1741262153Sluigi bkring = tx == NR_TX ? vpna->up.rx_rings : vpna->up.tx_rings; 1742262153Sluigi if (kring[ring_nr].nkr_stopped) 1743262153Sluigi netmap_disable_ring(&bkring[ring_nr]); 1744262153Sluigi else 1745262153Sluigi bkring[ring_nr].nkr_stopped = 0; 1746262153Sluigi return 0; 1747262153Sluigi } 1748262153Sluigi 1749262153Sluigi if (ifp == NULL || !(ifp->if_capenable & IFCAP_NETMAP)) 1750262153Sluigi return 0; 1751262153Sluigi 1752262153Sluigi /* we only care about receive interrupts */ 1753262153Sluigi if (tx == NR_TX) 1754262153Sluigi return 0; 1755262153Sluigi 1756262153Sluigi kring = &na->rx_rings[ring_nr]; 1757262153Sluigi ring = kring->ring; 1758262153Sluigi 1759262153Sluigi /* make sure the ring is not disabled */ 1760262153Sluigi if (nm_kr_tryget(kring)) 1761262153Sluigi return 0; 1762262153Sluigi 1763262153Sluigi if (is_host_ring && hostna->na_bdg == NULL) { 1764262153Sluigi error = bna->save_notify(na, ring_nr, tx, flags); 1765262153Sluigi goto put_out; 1766262153Sluigi } 1767262153Sluigi 1768262153Sluigi /* Here we expect ring->head = ring->cur = ring->tail 1769262153Sluigi * because everything has been released from the previous round. 1770262153Sluigi * However the ring is shared and we might have info from 1771262153Sluigi * the wrong side (the tx ring). Hence we overwrite with 1772262153Sluigi * the info from the rx kring. 1773262153Sluigi */ 1774262153Sluigi if (netmap_verbose) 1775262153Sluigi D("%s head %d cur %d tail %d (kring %d %d %d)", NM_IFPNAME(ifp), 1776262153Sluigi ring->head, ring->cur, ring->tail, 1777262153Sluigi kring->rhead, kring->rcur, kring->rtail); 1778262153Sluigi 1779262153Sluigi ring->head = kring->rhead; 1780262153Sluigi ring->cur = kring->rcur; 1781262153Sluigi ring->tail = kring->rtail; 1782262153Sluigi 1783262153Sluigi if (is_host_ring) { 1784262153Sluigi vpna = hostna; 1785262153Sluigi ring_nr = 0; 1786262153Sluigi } 1787262153Sluigi /* simulate a user wakeup on the rx ring */ 1788262153Sluigi /* fetch packets that have arrived. 1789262153Sluigi * XXX maybe do this in a loop ? 1790262153Sluigi */ 1791262153Sluigi error = kring->nm_sync(kring, 0); 1792262153Sluigi if (error) 1793262153Sluigi goto put_out; 1794262153Sluigi if (kring->nr_hwcur == kring->nr_hwtail && netmap_verbose) { 1795262153Sluigi D("how strange, interrupt with no packets on %s", 1796262153Sluigi NM_IFPNAME(ifp)); 1797262153Sluigi goto put_out; 1798262153Sluigi } 1799262153Sluigi 1800262153Sluigi /* new packets are ring->cur to ring->tail, and the bkring 1801262153Sluigi * had hwcur == ring->cur. So advance ring->cur to ring->tail 1802262153Sluigi * to push all packets out. 1803262153Sluigi */ 1804262153Sluigi ring->head = ring->cur = ring->tail; 1805262153Sluigi 1806262153Sluigi /* also set tail to what the bwrap expects */ 1807262153Sluigi bkring = &vpna->up.tx_rings[ring_nr]; 1808262153Sluigi ring->tail = bkring->nr_hwtail; // rtail too ? 1809262153Sluigi 1810262153Sluigi /* pass packets to the switch */ 1811262153Sluigi nm_txsync_prologue(bkring); // XXX error checking ? 1812262153Sluigi netmap_vp_txsync(vpna, ring_nr, flags); 1813262153Sluigi 1814262153Sluigi /* mark all buffers as released on this ring */ 1815262153Sluigi ring->head = ring->cur = kring->nr_hwtail; 1816262153Sluigi ring->tail = kring->rtail; 1817262153Sluigi /* another call to actually release the buffers */ 1818262153Sluigi if (!is_host_ring) { 1819262153Sluigi error = kring->nm_sync(kring, 0); 1820262153Sluigi } else { 1821262153Sluigi /* mark all packets as released, as in the 1822262153Sluigi * second part of netmap_rxsync_from_host() 1823262153Sluigi */ 1824262153Sluigi kring->nr_hwcur = kring->nr_hwtail; 1825262153Sluigi nm_rxsync_finalize(kring); 1826262153Sluigi } 1827262153Sluigi 1828262153Sluigiput_out: 1829262153Sluigi nm_kr_put(kring); 1830262153Sluigi return error; 1831262153Sluigi} 1832262153Sluigi 1833262153Sluigi 1834262153Sluigistatic int 1835262153Sluiginetmap_bwrap_register(struct netmap_adapter *na, int onoff) 1836262153Sluigi{ 1837262153Sluigi struct netmap_bwrap_adapter *bna = 1838262153Sluigi (struct netmap_bwrap_adapter *)na; 1839262153Sluigi struct netmap_adapter *hwna = bna->hwna; 1840262153Sluigi struct netmap_vp_adapter *hostna = &bna->host; 1841262153Sluigi int error; 1842262153Sluigi 1843262153Sluigi ND("%s %s", NM_IFPNAME(na->ifp), onoff ? "on" : "off"); 1844262153Sluigi 1845262153Sluigi if (onoff) { 1846262153Sluigi int i; 1847262153Sluigi 1848262153Sluigi hwna->na_lut = na->na_lut; 1849262153Sluigi hwna->na_lut_objtotal = na->na_lut_objtotal; 1850262153Sluigi 1851262153Sluigi if (hostna->na_bdg) { 1852262153Sluigi hostna->up.na_lut = na->na_lut; 1853262153Sluigi hostna->up.na_lut_objtotal = na->na_lut_objtotal; 1854262153Sluigi } 1855262153Sluigi 1856262153Sluigi /* cross-link the netmap rings 1857262153Sluigi * The original number of rings comes from hwna, 1858262153Sluigi * rx rings on one side equals tx rings on the other. 1859262153Sluigi */ 1860262153Sluigi for (i = 0; i < na->num_rx_rings + 1; i++) { 1861262153Sluigi hwna->tx_rings[i].nkr_num_slots = na->rx_rings[i].nkr_num_slots; 1862262153Sluigi hwna->tx_rings[i].ring = na->rx_rings[i].ring; 1863262153Sluigi } 1864262153Sluigi for (i = 0; i < na->num_tx_rings + 1; i++) { 1865262153Sluigi hwna->rx_rings[i].nkr_num_slots = na->tx_rings[i].nkr_num_slots; 1866262153Sluigi hwna->rx_rings[i].ring = na->tx_rings[i].ring; 1867262153Sluigi } 1868262153Sluigi } 1869262153Sluigi 1870262153Sluigi if (hwna->ifp) { 1871262153Sluigi error = hwna->nm_register(hwna, onoff); 1872262153Sluigi if (error) 1873262153Sluigi return error; 1874262153Sluigi } 1875262153Sluigi 1876262153Sluigi bdg_netmap_reg(na, onoff); 1877262153Sluigi 1878262153Sluigi if (onoff) { 1879262153Sluigi bna->save_notify = hwna->nm_notify; 1880262153Sluigi hwna->nm_notify = netmap_bwrap_intr_notify; 1881262153Sluigi } else { 1882262153Sluigi hwna->nm_notify = bna->save_notify; 1883262153Sluigi hwna->na_lut = NULL; 1884262153Sluigi hwna->na_lut_objtotal = 0; 1885262153Sluigi } 1886262153Sluigi 1887262153Sluigi return 0; 1888262153Sluigi} 1889262153Sluigi 1890262153Sluigi 1891262153Sluigistatic int 1892262153Sluiginetmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd, 1893262153Sluigi u_int *rxr, u_int *rxd) 1894262153Sluigi{ 1895262153Sluigi struct netmap_bwrap_adapter *bna = 1896262153Sluigi (struct netmap_bwrap_adapter *)na; 1897262153Sluigi struct netmap_adapter *hwna = bna->hwna; 1898262153Sluigi 1899262153Sluigi /* forward the request */ 1900262153Sluigi netmap_update_config(hwna); 1901262153Sluigi /* swap the results */ 1902262153Sluigi *txr = hwna->num_rx_rings; 1903262153Sluigi *txd = hwna->num_rx_desc; 1904262153Sluigi *rxr = hwna->num_tx_rings; 1905262153Sluigi *rxd = hwna->num_rx_desc; 1906262153Sluigi 1907262153Sluigi return 0; 1908262153Sluigi} 1909262153Sluigi 1910262153Sluigi 1911262153Sluigistatic int 1912262153Sluiginetmap_bwrap_krings_create(struct netmap_adapter *na) 1913262153Sluigi{ 1914262153Sluigi struct netmap_bwrap_adapter *bna = 1915262153Sluigi (struct netmap_bwrap_adapter *)na; 1916262153Sluigi struct netmap_adapter *hwna = bna->hwna; 1917262153Sluigi struct netmap_adapter *hostna = &bna->host.up; 1918262153Sluigi int error; 1919262153Sluigi 1920262153Sluigi ND("%s", NM_IFPNAME(na->ifp)); 1921262153Sluigi 1922262153Sluigi error = netmap_vp_krings_create(na); 1923262153Sluigi if (error) 1924262153Sluigi return error; 1925262153Sluigi 1926262153Sluigi error = hwna->nm_krings_create(hwna); 1927262153Sluigi if (error) { 1928262153Sluigi netmap_vp_krings_delete(na); 1929262153Sluigi return error; 1930262153Sluigi } 1931262153Sluigi 1932262153Sluigi if (na->na_flags & NAF_HOST_RINGS) { 1933262153Sluigi hostna->tx_rings = na->tx_rings + na->num_tx_rings; 1934262153Sluigi hostna->rx_rings = na->rx_rings + na->num_rx_rings; 1935262153Sluigi } 1936262153Sluigi 1937262153Sluigi return 0; 1938262153Sluigi} 1939262153Sluigi 1940262153Sluigi 1941262153Sluigistatic void 1942262153Sluiginetmap_bwrap_krings_delete(struct netmap_adapter *na) 1943262153Sluigi{ 1944262153Sluigi struct netmap_bwrap_adapter *bna = 1945262153Sluigi (struct netmap_bwrap_adapter *)na; 1946262153Sluigi struct netmap_adapter *hwna = bna->hwna; 1947262153Sluigi 1948262153Sluigi ND("%s", NM_IFPNAME(na->ifp)); 1949262153Sluigi 1950262153Sluigi hwna->nm_krings_delete(hwna); 1951262153Sluigi netmap_vp_krings_delete(na); 1952262153Sluigi} 1953262153Sluigi 1954262153Sluigi 1955262153Sluigi/* notify method for the bridge-->hwna direction */ 1956262153Sluigistatic int 1957262153Sluiginetmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags) 1958262153Sluigi{ 1959262153Sluigi struct netmap_bwrap_adapter *bna = 1960262153Sluigi (struct netmap_bwrap_adapter *)na; 1961262153Sluigi struct netmap_adapter *hwna = bna->hwna; 1962262153Sluigi struct netmap_kring *kring, *hw_kring; 1963262153Sluigi struct netmap_ring *ring; 1964262153Sluigi u_int lim; 1965262153Sluigi int error = 0; 1966262153Sluigi 1967262153Sluigi if (tx == NR_TX) 1968262153Sluigi return EINVAL; 1969262153Sluigi 1970262153Sluigi kring = &na->rx_rings[ring_n]; 1971262153Sluigi hw_kring = &hwna->tx_rings[ring_n]; 1972262153Sluigi ring = kring->ring; 1973262153Sluigi lim = kring->nkr_num_slots - 1; 1974262153Sluigi 1975262153Sluigi if (hwna->ifp == NULL || !(hwna->ifp->if_capenable & IFCAP_NETMAP)) 1976262153Sluigi return 0; 1977262153Sluigi mtx_lock(&kring->q_lock); 1978262153Sluigi /* first step: simulate a user wakeup on the rx ring */ 1979262153Sluigi netmap_vp_rxsync(na, ring_n, flags); 1980262153Sluigi ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", 1981262153Sluigi NM_IFPNAME(na->ifp), ring_n, 1982262153Sluigi kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, 1983262153Sluigi ring->head, ring->cur, ring->tail, 1984262153Sluigi hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail); 1985262153Sluigi /* second step: the simulated user consumes all new packets */ 1986262153Sluigi ring->head = ring->cur = ring->tail; 1987262153Sluigi 1988262153Sluigi /* third step: the new packets are sent on the tx ring 1989262153Sluigi * (which is actually the same ring) 1990262153Sluigi */ 1991262153Sluigi /* set tail to what the hw expects */ 1992262153Sluigi ring->tail = hw_kring->rtail; 1993262153Sluigi nm_txsync_prologue(&hwna->tx_rings[ring_n]); // XXX error checking ? 1994262153Sluigi error = hw_kring->nm_sync(hw_kring, flags); 1995262153Sluigi 1996262153Sluigi /* fourth step: now we are back the rx ring */ 1997262153Sluigi /* claim ownership on all hw owned bufs */ 1998262153Sluigi ring->head = nm_next(ring->tail, lim); /* skip past reserved slot */ 1999262153Sluigi ring->tail = kring->rtail; /* restore saved value of tail, for safety */ 2000262153Sluigi 2001262153Sluigi /* fifth step: the user goes to sleep again, causing another rxsync */ 2002262153Sluigi netmap_vp_rxsync(na, ring_n, flags); 2003262153Sluigi ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", 2004262153Sluigi NM_IFPNAME(na->ifp), ring_n, 2005262153Sluigi kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, 2006262153Sluigi ring->head, ring->cur, ring->tail, 2007262153Sluigi hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail); 2008262153Sluigi mtx_unlock(&kring->q_lock); 2009262153Sluigi return error; 2010262153Sluigi} 2011262153Sluigi 2012262153Sluigi 2013262153Sluigistatic int 2014262153Sluiginetmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags) 2015262153Sluigi{ 2016262153Sluigi struct netmap_bwrap_adapter *bna = na->na_private; 2017262153Sluigi struct netmap_adapter *port_na = &bna->up.up; 2018262153Sluigi if (tx == NR_TX || ring_n != 0) 2019262153Sluigi return EINVAL; 2020262153Sluigi return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags); 2021262153Sluigi} 2022262153Sluigi 2023262153Sluigi 2024262153Sluigi/* attach a bridge wrapper to the 'real' device */ 2025262153Sluigistatic int 2026262153Sluiginetmap_bwrap_attach(struct ifnet *fake, struct ifnet *real) 2027262153Sluigi{ 2028262153Sluigi struct netmap_bwrap_adapter *bna; 2029262153Sluigi struct netmap_adapter *na; 2030262153Sluigi struct netmap_adapter *hwna = NA(real); 2031262153Sluigi struct netmap_adapter *hostna; 2032262153Sluigi int error; 2033262153Sluigi 2034262153Sluigi 2035262153Sluigi bna = malloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO); 2036262153Sluigi if (bna == NULL) 2037262153Sluigi return ENOMEM; 2038262153Sluigi 2039262153Sluigi na = &bna->up.up; 2040262153Sluigi na->ifp = fake; 2041262153Sluigi /* fill the ring data for the bwrap adapter with rx/tx meanings 2042262153Sluigi * swapped. The real cross-linking will be done during register, 2043262153Sluigi * when all the krings will have been created. 2044262153Sluigi */ 2045262153Sluigi na->num_rx_rings = hwna->num_tx_rings; 2046262153Sluigi na->num_tx_rings = hwna->num_rx_rings; 2047262153Sluigi na->num_tx_desc = hwna->num_rx_desc; 2048262153Sluigi na->num_rx_desc = hwna->num_tx_desc; 2049262153Sluigi na->nm_dtor = netmap_bwrap_dtor; 2050262153Sluigi na->nm_register = netmap_bwrap_register; 2051262153Sluigi // na->nm_txsync = netmap_bwrap_txsync; 2052262153Sluigi // na->nm_rxsync = netmap_bwrap_rxsync; 2053262153Sluigi na->nm_config = netmap_bwrap_config; 2054262153Sluigi na->nm_krings_create = netmap_bwrap_krings_create; 2055262153Sluigi na->nm_krings_delete = netmap_bwrap_krings_delete; 2056262153Sluigi na->nm_notify = netmap_bwrap_notify; 2057262153Sluigi na->nm_mem = hwna->nm_mem; 2058262153Sluigi na->na_private = na; /* prevent NIOCREGIF */ 2059262153Sluigi bna->up.retry = 1; /* XXX maybe this should depend on the hwna */ 2060262153Sluigi 2061262153Sluigi bna->hwna = hwna; 2062262153Sluigi netmap_adapter_get(hwna); 2063262153Sluigi hwna->na_private = bna; /* weak reference */ 2064262153Sluigi 2065262153Sluigi if (hwna->na_flags & NAF_HOST_RINGS) { 2066262153Sluigi na->na_flags |= NAF_HOST_RINGS; 2067262153Sluigi hostna = &bna->host.up; 2068262153Sluigi hostna->ifp = hwna->ifp; 2069262153Sluigi hostna->num_tx_rings = 1; 2070262153Sluigi hostna->num_tx_desc = hwna->num_rx_desc; 2071262153Sluigi hostna->num_rx_rings = 1; 2072262153Sluigi hostna->num_rx_desc = hwna->num_tx_desc; 2073262153Sluigi // hostna->nm_txsync = netmap_bwrap_host_txsync; 2074262153Sluigi // hostna->nm_rxsync = netmap_bwrap_host_rxsync; 2075262153Sluigi hostna->nm_notify = netmap_bwrap_host_notify; 2076262153Sluigi hostna->nm_mem = na->nm_mem; 2077262153Sluigi hostna->na_private = bna; 2078262153Sluigi } 2079262153Sluigi 2080262153Sluigi ND("%s<->%s txr %d txd %d rxr %d rxd %d", 2081262153Sluigi fake->if_xname, real->if_xname, 2082262153Sluigi na->num_tx_rings, na->num_tx_desc, 2083262153Sluigi na->num_rx_rings, na->num_rx_desc); 2084262153Sluigi 2085262153Sluigi error = netmap_attach_common(na); 2086262153Sluigi if (error) { 2087262153Sluigi netmap_adapter_put(hwna); 2088262153Sluigi free(bna, M_DEVBUF); 2089262153Sluigi return error; 2090262153Sluigi } 2091262153Sluigi return 0; 2092262153Sluigi} 2093262153Sluigi 2094262153Sluigi 2095262153Sluigivoid 2096262153Sluiginetmap_init_bridges(void) 2097262153Sluigi{ 2098262153Sluigi int i; 2099262153Sluigi bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */ 2100262153Sluigi for (i = 0; i < NM_BRIDGES; i++) 2101262153Sluigi BDG_RWINIT(&nm_bridges[i]); 2102262153Sluigi} 2103262153Sluigi#endif /* WITH_VALE */ 2104