netmap_vale.c revision 270063
1259412Sluigi/* 2260368Sluigi * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. 3259412Sluigi * 4259412Sluigi * Redistribution and use in source and binary forms, with or without 5259412Sluigi * modification, are permitted provided that the following conditions 6259412Sluigi * are met: 7259412Sluigi * 1. Redistributions of source code must retain the above copyright 8259412Sluigi * notice, this list of conditions and the following disclaimer. 9259412Sluigi * 2. Redistributions in binary form must reproduce the above copyright 10259412Sluigi * notice, this list of conditions and the following disclaimer in the 11259412Sluigi * documentation and/or other materials provided with the distribution. 12259412Sluigi * 13259412Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14259412Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15259412Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16259412Sluigi * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17259412Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18259412Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19259412Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20259412Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21259412Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22259412Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23259412Sluigi * SUCH DAMAGE. 24259412Sluigi */ 25259412Sluigi 26259412Sluigi 27259412Sluigi/* 28259412Sluigi * This module implements the VALE switch for netmap 29259412Sluigi 30259412Sluigi--- VALE SWITCH --- 31259412Sluigi 32259412SluigiNMG_LOCK() serializes all modifications to switches and ports. 33259412SluigiA switch cannot be deleted until all ports are gone. 34259412Sluigi 35259412SluigiFor each switch, an SX lock (RWlock on linux) protects 36259412Sluigideletion of ports. When configuring or deleting a new port, the 37259412Sluigilock is acquired in exclusive mode (after holding NMG_LOCK). 38259412SluigiWhen forwarding, the lock is acquired in shared mode (without NMG_LOCK). 39259412SluigiThe lock is held throughout the entire forwarding cycle, 40259412Sluigiduring which the thread may incur in a page fault. 41259412SluigiHence it is important that sleepable shared locks are used. 42259412Sluigi 43259412SluigiOn the rx ring, the per-port lock is grabbed initially to reserve 44259412Sluigia number of slot in the ring, then the lock is released, 45259412Sluigipackets are copied from source to destination, and then 46259412Sluigithe lock is acquired again and the receive ring is updated. 47259412Sluigi(A similar thing is done on the tx ring for NIC and host stack 48259412Sluigiports attached to the switch) 49259412Sluigi 50259412Sluigi */ 51259412Sluigi 52259412Sluigi/* 53259412Sluigi * OS-specific code that is used only within this file. 54259412Sluigi * Other OS-specific code that must be accessed by drivers 55259412Sluigi * is present in netmap_kern.h 56259412Sluigi */ 57259412Sluigi 58259412Sluigi#if defined(__FreeBSD__) 59259412Sluigi#include <sys/cdefs.h> /* prerequisite */ 60259412Sluigi__FBSDID("$FreeBSD: head/sys/dev/netmap/netmap_vale.c 270063 2014-08-16 15:00:01Z luigi $"); 61259412Sluigi 62259412Sluigi#include <sys/types.h> 63259412Sluigi#include <sys/errno.h> 64259412Sluigi#include <sys/param.h> /* defines used in kernel.h */ 65259412Sluigi#include <sys/kernel.h> /* types used in module initialization */ 66259412Sluigi#include <sys/conf.h> /* cdevsw struct, UID, GID */ 67259412Sluigi#include <sys/sockio.h> 68259412Sluigi#include <sys/socketvar.h> /* struct socket */ 69259412Sluigi#include <sys/malloc.h> 70259412Sluigi#include <sys/poll.h> 71259412Sluigi#include <sys/rwlock.h> 72259412Sluigi#include <sys/socket.h> /* sockaddrs */ 73259412Sluigi#include <sys/selinfo.h> 74259412Sluigi#include <sys/sysctl.h> 75259412Sluigi#include <net/if.h> 76259412Sluigi#include <net/if_var.h> 77259412Sluigi#include <net/bpf.h> /* BIOCIMMEDIATE */ 78259412Sluigi#include <machine/bus.h> /* bus_dmamap_* */ 79259412Sluigi#include <sys/endian.h> 80259412Sluigi#include <sys/refcount.h> 81259412Sluigi 82259412Sluigi 83259412Sluigi#define BDG_RWLOCK_T struct rwlock // struct rwlock 84259412Sluigi 85259412Sluigi#define BDG_RWINIT(b) \ 86259412Sluigi rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS) 87259412Sluigi#define BDG_WLOCK(b) rw_wlock(&(b)->bdg_lock) 88259412Sluigi#define BDG_WUNLOCK(b) rw_wunlock(&(b)->bdg_lock) 89259412Sluigi#define BDG_RLOCK(b) rw_rlock(&(b)->bdg_lock) 90259412Sluigi#define BDG_RTRYLOCK(b) rw_try_rlock(&(b)->bdg_lock) 91259412Sluigi#define BDG_RUNLOCK(b) rw_runlock(&(b)->bdg_lock) 92259412Sluigi#define BDG_RWDESTROY(b) rw_destroy(&(b)->bdg_lock) 93259412Sluigi 94259412Sluigi 95259412Sluigi#elif defined(linux) 96259412Sluigi 97259412Sluigi#include "bsd_glue.h" 98259412Sluigi 99259412Sluigi#elif defined(__APPLE__) 100259412Sluigi 101259412Sluigi#warning OSX support is only partial 102259412Sluigi#include "osx_glue.h" 103259412Sluigi 104259412Sluigi#else 105259412Sluigi 106259412Sluigi#error Unsupported platform 107259412Sluigi 108259412Sluigi#endif /* unsupported */ 109259412Sluigi 110259412Sluigi/* 111259412Sluigi * common headers 112259412Sluigi */ 113259412Sluigi 114259412Sluigi#include <net/netmap.h> 115259412Sluigi#include <dev/netmap/netmap_kern.h> 116259412Sluigi#include <dev/netmap/netmap_mem2.h> 117259412Sluigi 118259412Sluigi#ifdef WITH_VALE 119259412Sluigi 120259412Sluigi/* 121259412Sluigi * system parameters (most of them in netmap_kern.h) 122259412Sluigi * NM_NAME prefix for switch port names, default "vale" 123259412Sluigi * NM_BDG_MAXPORTS number of ports 124259412Sluigi * NM_BRIDGES max number of switches in the system. 125259412Sluigi * XXX should become a sysctl or tunable 126259412Sluigi * 127259412Sluigi * Switch ports are named valeX:Y where X is the switch name and Y 128259412Sluigi * is the port. If Y matches a physical interface name, the port is 129259412Sluigi * connected to a physical device. 130259412Sluigi * 131259412Sluigi * Unlike physical interfaces, switch ports use their own memory region 132259412Sluigi * for rings and buffers. 133259412Sluigi * The virtual interfaces use per-queue lock instead of core lock. 134259412Sluigi * In the tx loop, we aggregate traffic in batches to make all operations 135259412Sluigi * faster. The batch size is bridge_batch. 136259412Sluigi */ 137259412Sluigi#define NM_BDG_MAXRINGS 16 /* XXX unclear how many. */ 138259412Sluigi#define NM_BDG_MAXSLOTS 4096 /* XXX same as above */ 139259412Sluigi#define NM_BRIDGE_RINGSIZE 1024 /* in the device */ 140259412Sluigi#define NM_BDG_HASH 1024 /* forwarding table entries */ 141259412Sluigi#define NM_BDG_BATCH 1024 /* entries in the forwarding buffer */ 142259412Sluigi#define NM_MULTISEG 64 /* max size of a chain of bufs */ 143259412Sluigi/* actual size of the tables */ 144259412Sluigi#define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NM_MULTISEG) 145259412Sluigi/* NM_FT_NULL terminates a list of slots in the ft */ 146259412Sluigi#define NM_FT_NULL NM_BDG_BATCH_MAX 147259412Sluigi#define NM_BRIDGES 8 /* number of bridges */ 148259412Sluigi 149259412Sluigi 150259412Sluigi/* 151259412Sluigi * bridge_batch is set via sysctl to the max batch size to be 152259412Sluigi * used in the bridge. The actual value may be larger as the 153259412Sluigi * last packet in the block may overflow the size. 154259412Sluigi */ 155259412Sluigiint bridge_batch = NM_BDG_BATCH; /* bridge batch size */ 156259412SluigiSYSCTL_DECL(_dev_netmap); 157259412SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , ""); 158259412Sluigi 159259412Sluigi 160270063Sluigistatic int netmap_vp_create(struct nmreq *, struct ifnet *, struct netmap_vp_adapter **); 161270063Sluigistatic int netmap_vp_reg(struct netmap_adapter *na, int onoff); 162259412Sluigistatic int netmap_bwrap_register(struct netmap_adapter *, int onoff); 163259412Sluigi 164259412Sluigi/* 165259412Sluigi * For each output interface, nm_bdg_q is used to construct a list. 166259412Sluigi * bq_len is the number of output buffers (we can have coalescing 167259412Sluigi * during the copy). 168259412Sluigi */ 169259412Sluigistruct nm_bdg_q { 170259412Sluigi uint16_t bq_head; 171259412Sluigi uint16_t bq_tail; 172259412Sluigi uint32_t bq_len; /* number of buffers */ 173259412Sluigi}; 174259412Sluigi 175259412Sluigi/* XXX revise this */ 176259412Sluigistruct nm_hash_ent { 177259412Sluigi uint64_t mac; /* the top 2 bytes are the epoch */ 178259412Sluigi uint64_t ports; 179259412Sluigi}; 180259412Sluigi 181259412Sluigi/* 182259412Sluigi * nm_bridge is a descriptor for a VALE switch. 183259412Sluigi * Interfaces for a bridge are all in bdg_ports[]. 184259412Sluigi * The array has fixed size, an empty entry does not terminate 185259412Sluigi * the search, but lookups only occur on attach/detach so we 186259412Sluigi * don't mind if they are slow. 187259412Sluigi * 188259412Sluigi * The bridge is non blocking on the transmit ports: excess 189259412Sluigi * packets are dropped if there is no room on the output port. 190259412Sluigi * 191259412Sluigi * bdg_lock protects accesses to the bdg_ports array. 192259412Sluigi * This is a rw lock (or equivalent). 193259412Sluigi */ 194259412Sluigistruct nm_bridge { 195259412Sluigi /* XXX what is the proper alignment/layout ? */ 196259412Sluigi BDG_RWLOCK_T bdg_lock; /* protects bdg_ports */ 197259412Sluigi int bdg_namelen; 198259412Sluigi uint32_t bdg_active_ports; /* 0 means free */ 199259412Sluigi char bdg_basename[IFNAMSIZ]; 200259412Sluigi 201259412Sluigi /* Indexes of active ports (up to active_ports) 202259412Sluigi * and all other remaining ports. 203259412Sluigi */ 204259412Sluigi uint8_t bdg_port_index[NM_BDG_MAXPORTS]; 205259412Sluigi 206259412Sluigi struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS]; 207259412Sluigi 208259412Sluigi 209259412Sluigi /* 210259412Sluigi * The function to decide the destination port. 211259412Sluigi * It returns either of an index of the destination port, 212259412Sluigi * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to 213259412Sluigi * forward this packet. ring_nr is the source ring index, and the 214259412Sluigi * function may overwrite this value to forward this packet to a 215259412Sluigi * different ring index. 216259412Sluigi * This function must be set by netmap_bdgctl(). 217259412Sluigi */ 218270063Sluigi struct netmap_bdg_ops bdg_ops; 219259412Sluigi 220259412Sluigi /* the forwarding table, MAC+ports. 221259412Sluigi * XXX should be changed to an argument to be passed to 222259412Sluigi * the lookup function, and allocated on attach 223259412Sluigi */ 224259412Sluigi struct nm_hash_ent ht[NM_BDG_HASH]; 225259412Sluigi}; 226259412Sluigi 227270063Sluigiconst char* 228270063Sluiginetmap_bdg_name(struct netmap_vp_adapter *vp) 229270063Sluigi{ 230270063Sluigi struct nm_bridge *b = vp->na_bdg; 231270063Sluigi if (b == NULL) 232270063Sluigi return NULL; 233270063Sluigi return b->bdg_basename; 234270063Sluigi} 235259412Sluigi 236270063Sluigi 237259412Sluigi/* 238259412Sluigi * XXX in principle nm_bridges could be created dynamically 239259412Sluigi * Right now we have a static array and deletions are protected 240259412Sluigi * by an exclusive lock. 241259412Sluigi */ 242259412Sluigistruct nm_bridge nm_bridges[NM_BRIDGES]; 243259412Sluigi 244259412Sluigi 245259412Sluigi/* 246259412Sluigi * this is a slightly optimized copy routine which rounds 247259412Sluigi * to multiple of 64 bytes and is often faster than dealing 248259412Sluigi * with other odd sizes. We assume there is enough room 249259412Sluigi * in the source and destination buffers. 250259412Sluigi * 251259412Sluigi * XXX only for multiples of 64 bytes, non overlapped. 252259412Sluigi */ 253259412Sluigistatic inline void 254259412Sluigipkt_copy(void *_src, void *_dst, int l) 255259412Sluigi{ 256259412Sluigi uint64_t *src = _src; 257259412Sluigi uint64_t *dst = _dst; 258259412Sluigi if (unlikely(l >= 1024)) { 259259412Sluigi memcpy(dst, src, l); 260259412Sluigi return; 261259412Sluigi } 262259412Sluigi for (; likely(l > 0); l-=64) { 263259412Sluigi *dst++ = *src++; 264259412Sluigi *dst++ = *src++; 265259412Sluigi *dst++ = *src++; 266259412Sluigi *dst++ = *src++; 267259412Sluigi *dst++ = *src++; 268259412Sluigi *dst++ = *src++; 269259412Sluigi *dst++ = *src++; 270259412Sluigi *dst++ = *src++; 271259412Sluigi } 272259412Sluigi} 273259412Sluigi 274259412Sluigi 275259412Sluigi/* 276259412Sluigi * locate a bridge among the existing ones. 277259412Sluigi * MUST BE CALLED WITH NMG_LOCK() 278259412Sluigi * 279259412Sluigi * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME. 280259412Sluigi * We assume that this is called with a name of at least NM_NAME chars. 281259412Sluigi */ 282259412Sluigistatic struct nm_bridge * 283259412Sluiginm_find_bridge(const char *name, int create) 284259412Sluigi{ 285259412Sluigi int i, l, namelen; 286259412Sluigi struct nm_bridge *b = NULL; 287259412Sluigi 288259412Sluigi NMG_LOCK_ASSERT(); 289259412Sluigi 290259412Sluigi namelen = strlen(NM_NAME); /* base length */ 291259412Sluigi l = name ? strlen(name) : 0; /* actual length */ 292259412Sluigi if (l < namelen) { 293259412Sluigi D("invalid bridge name %s", name ? name : NULL); 294259412Sluigi return NULL; 295259412Sluigi } 296259412Sluigi for (i = namelen + 1; i < l; i++) { 297259412Sluigi if (name[i] == ':') { 298259412Sluigi namelen = i; 299259412Sluigi break; 300259412Sluigi } 301259412Sluigi } 302259412Sluigi if (namelen >= IFNAMSIZ) 303259412Sluigi namelen = IFNAMSIZ; 304259412Sluigi ND("--- prefix is '%.*s' ---", namelen, name); 305259412Sluigi 306259412Sluigi /* lookup the name, remember empty slot if there is one */ 307259412Sluigi for (i = 0; i < NM_BRIDGES; i++) { 308259412Sluigi struct nm_bridge *x = nm_bridges + i; 309259412Sluigi 310259412Sluigi if (x->bdg_active_ports == 0) { 311259412Sluigi if (create && b == NULL) 312259412Sluigi b = x; /* record empty slot */ 313259412Sluigi } else if (x->bdg_namelen != namelen) { 314259412Sluigi continue; 315259412Sluigi } else if (strncmp(name, x->bdg_basename, namelen) == 0) { 316259412Sluigi ND("found '%.*s' at %d", namelen, name, i); 317259412Sluigi b = x; 318259412Sluigi break; 319259412Sluigi } 320259412Sluigi } 321259412Sluigi if (i == NM_BRIDGES && b) { /* name not found, can create entry */ 322259412Sluigi /* initialize the bridge */ 323259412Sluigi strncpy(b->bdg_basename, name, namelen); 324259412Sluigi ND("create new bridge %s with ports %d", b->bdg_basename, 325259412Sluigi b->bdg_active_ports); 326259412Sluigi b->bdg_namelen = namelen; 327259412Sluigi b->bdg_active_ports = 0; 328259412Sluigi for (i = 0; i < NM_BDG_MAXPORTS; i++) 329259412Sluigi b->bdg_port_index[i] = i; 330259412Sluigi /* set the default function */ 331270063Sluigi b->bdg_ops.lookup = netmap_bdg_learning; 332259412Sluigi /* reset the MAC address table */ 333259412Sluigi bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH); 334259412Sluigi } 335259412Sluigi return b; 336259412Sluigi} 337259412Sluigi 338259412Sluigi 339259412Sluigi/* 340259412Sluigi * Free the forwarding tables for rings attached to switch ports. 341259412Sluigi */ 342259412Sluigistatic void 343259412Sluiginm_free_bdgfwd(struct netmap_adapter *na) 344259412Sluigi{ 345259412Sluigi int nrings, i; 346259412Sluigi struct netmap_kring *kring; 347259412Sluigi 348259412Sluigi NMG_LOCK_ASSERT(); 349260368Sluigi nrings = na->num_tx_rings; 350260368Sluigi kring = na->tx_rings; 351259412Sluigi for (i = 0; i < nrings; i++) { 352259412Sluigi if (kring[i].nkr_ft) { 353259412Sluigi free(kring[i].nkr_ft, M_DEVBUF); 354259412Sluigi kring[i].nkr_ft = NULL; /* protect from freeing twice */ 355259412Sluigi } 356259412Sluigi } 357259412Sluigi} 358259412Sluigi 359259412Sluigi 360259412Sluigi/* 361259412Sluigi * Allocate the forwarding tables for the rings attached to the bridge ports. 362259412Sluigi */ 363259412Sluigistatic int 364259412Sluiginm_alloc_bdgfwd(struct netmap_adapter *na) 365259412Sluigi{ 366259412Sluigi int nrings, l, i, num_dstq; 367259412Sluigi struct netmap_kring *kring; 368259412Sluigi 369259412Sluigi NMG_LOCK_ASSERT(); 370259412Sluigi /* all port:rings + broadcast */ 371259412Sluigi num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1; 372259412Sluigi l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX; 373259412Sluigi l += sizeof(struct nm_bdg_q) * num_dstq; 374259412Sluigi l += sizeof(uint16_t) * NM_BDG_BATCH_MAX; 375259412Sluigi 376261909Sluigi nrings = netmap_real_tx_rings(na); 377259412Sluigi kring = na->tx_rings; 378259412Sluigi for (i = 0; i < nrings; i++) { 379259412Sluigi struct nm_bdg_fwd *ft; 380259412Sluigi struct nm_bdg_q *dstq; 381259412Sluigi int j; 382259412Sluigi 383259412Sluigi ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO); 384259412Sluigi if (!ft) { 385259412Sluigi nm_free_bdgfwd(na); 386259412Sluigi return ENOMEM; 387259412Sluigi } 388259412Sluigi dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); 389259412Sluigi for (j = 0; j < num_dstq; j++) { 390259412Sluigi dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL; 391259412Sluigi dstq[j].bq_len = 0; 392259412Sluigi } 393259412Sluigi kring[i].nkr_ft = ft; 394259412Sluigi } 395259412Sluigi return 0; 396259412Sluigi} 397259412Sluigi 398259412Sluigi 399270063Sluigi/* remove from bridge b the ports in slots hw and sw 400270063Sluigi * (sw can be -1 if not needed) 401270063Sluigi */ 402259412Sluigistatic void 403259412Sluiginetmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw) 404259412Sluigi{ 405259412Sluigi int s_hw = hw, s_sw = sw; 406259412Sluigi int i, lim =b->bdg_active_ports; 407259412Sluigi uint8_t tmp[NM_BDG_MAXPORTS]; 408259412Sluigi 409259412Sluigi /* 410259412Sluigi New algorithm: 411259412Sluigi make a copy of bdg_port_index; 412259412Sluigi lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port 413259412Sluigi in the array of bdg_port_index, replacing them with 414259412Sluigi entries from the bottom of the array; 415259412Sluigi decrement bdg_active_ports; 416259412Sluigi acquire BDG_WLOCK() and copy back the array. 417259412Sluigi */ 418259412Sluigi 419261909Sluigi if (netmap_verbose) 420261909Sluigi D("detach %d and %d (lim %d)", hw, sw, lim); 421259412Sluigi /* make a copy of the list of active ports, update it, 422259412Sluigi * and then copy back within BDG_WLOCK(). 423259412Sluigi */ 424259412Sluigi memcpy(tmp, b->bdg_port_index, sizeof(tmp)); 425259412Sluigi for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) { 426259412Sluigi if (hw >= 0 && tmp[i] == hw) { 427259412Sluigi ND("detach hw %d at %d", hw, i); 428259412Sluigi lim--; /* point to last active port */ 429259412Sluigi tmp[i] = tmp[lim]; /* swap with i */ 430259412Sluigi tmp[lim] = hw; /* now this is inactive */ 431259412Sluigi hw = -1; 432259412Sluigi } else if (sw >= 0 && tmp[i] == sw) { 433259412Sluigi ND("detach sw %d at %d", sw, i); 434259412Sluigi lim--; 435259412Sluigi tmp[i] = tmp[lim]; 436259412Sluigi tmp[lim] = sw; 437259412Sluigi sw = -1; 438259412Sluigi } else { 439259412Sluigi i++; 440259412Sluigi } 441259412Sluigi } 442259412Sluigi if (hw >= 0 || sw >= 0) { 443259412Sluigi D("XXX delete failed hw %d sw %d, should panic...", hw, sw); 444259412Sluigi } 445259412Sluigi 446259412Sluigi BDG_WLOCK(b); 447270063Sluigi if (b->bdg_ops.dtor) 448270063Sluigi b->bdg_ops.dtor(b->bdg_ports[s_hw]); 449259412Sluigi b->bdg_ports[s_hw] = NULL; 450259412Sluigi if (s_sw >= 0) { 451259412Sluigi b->bdg_ports[s_sw] = NULL; 452259412Sluigi } 453259412Sluigi memcpy(b->bdg_port_index, tmp, sizeof(tmp)); 454259412Sluigi b->bdg_active_ports = lim; 455259412Sluigi BDG_WUNLOCK(b); 456259412Sluigi 457259412Sluigi ND("now %d active ports", lim); 458259412Sluigi if (lim == 0) { 459259412Sluigi ND("marking bridge %s as free", b->bdg_basename); 460270063Sluigi bzero(&b->bdg_ops, sizeof(b->bdg_ops)); 461259412Sluigi } 462259412Sluigi} 463259412Sluigi 464270063Sluigi/* nm_bdg_ctl callback for VALE ports */ 465270063Sluigistatic int 466270063Sluiginetmap_vp_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach) 467270063Sluigi{ 468270063Sluigi struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; 469270063Sluigi struct nm_bridge *b = vpna->na_bdg; 470260368Sluigi 471270063Sluigi if (attach) 472270063Sluigi return 0; /* nothing to do */ 473270063Sluigi if (b) { 474270063Sluigi netmap_set_all_rings(na, 0 /* disable */); 475270063Sluigi netmap_bdg_detach_common(b, vpna->bdg_port, -1); 476270063Sluigi vpna->na_bdg = NULL; 477270063Sluigi netmap_set_all_rings(na, 1 /* enable */); 478270063Sluigi } 479270063Sluigi /* I have took reference just for attach */ 480270063Sluigi netmap_adapter_put(na); 481270063Sluigi return 0; 482270063Sluigi} 483270063Sluigi 484270063Sluigi/* nm_dtor callback for ephemeral VALE ports */ 485259412Sluigistatic void 486270063Sluiginetmap_vp_dtor(struct netmap_adapter *na) 487259412Sluigi{ 488259412Sluigi struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na; 489259412Sluigi struct nm_bridge *b = vpna->na_bdg; 490259412Sluigi 491270063Sluigi ND("%s has %d references", na->name, na->na_refcount); 492259412Sluigi 493259412Sluigi if (b) { 494259412Sluigi netmap_bdg_detach_common(b, vpna->bdg_port, -1); 495259412Sluigi } 496270063Sluigi} 497259412Sluigi 498270063Sluigi/* nm_dtor callback for persistent VALE ports */ 499270063Sluigistatic void 500270063Sluiginetmap_persist_vp_dtor(struct netmap_adapter *na) 501270063Sluigi{ 502270063Sluigi struct ifnet *ifp = na->ifp; 503270063Sluigi 504270063Sluigi netmap_vp_dtor(na); 505259412Sluigi na->ifp = NULL; 506270063Sluigi nm_vi_detach(ifp); 507259412Sluigi} 508259412Sluigi 509270063Sluigi/* remove a persistent VALE port from the system */ 510270063Sluigistatic int 511270063Sluiginm_vi_destroy(const char *name) 512270063Sluigi{ 513270063Sluigi struct ifnet *ifp; 514270063Sluigi int error; 515260368Sluigi 516270063Sluigi ifp = ifunit_ref(name); 517270063Sluigi if (!ifp) 518270063Sluigi return ENXIO; 519270063Sluigi NMG_LOCK(); 520270063Sluigi /* make sure this is actually a VALE port */ 521270063Sluigi if (!NETMAP_CAPABLE(ifp) || NA(ifp)->nm_register != netmap_vp_reg) { 522270063Sluigi error = EINVAL; 523270063Sluigi goto err; 524270063Sluigi } 525270063Sluigi 526270063Sluigi if (NA(ifp)->na_refcount > 1) { 527270063Sluigi error = EBUSY; 528270063Sluigi goto err; 529270063Sluigi } 530270063Sluigi NMG_UNLOCK(); 531270063Sluigi 532270063Sluigi D("destroying a persistent vale interface %s", ifp->if_xname); 533270063Sluigi /* Linux requires all the references are released 534270063Sluigi * before unregister 535270063Sluigi */ 536270063Sluigi if_rele(ifp); 537270063Sluigi netmap_detach(ifp); 538270063Sluigi return 0; 539270063Sluigi 540270063Sluigierr: 541270063Sluigi NMG_UNLOCK(); 542270063Sluigi if_rele(ifp); 543270063Sluigi return error; 544270063Sluigi} 545270063Sluigi 546270063Sluigi/* 547270063Sluigi * Create a virtual interface registered to the system. 548270063Sluigi * The interface will be attached to a bridge later. 549270063Sluigi */ 550270063Sluigistatic int 551270063Sluiginm_vi_create(struct nmreq *nmr) 552270063Sluigi{ 553270063Sluigi struct ifnet *ifp; 554270063Sluigi struct netmap_vp_adapter *vpna; 555270063Sluigi int error; 556270063Sluigi 557270063Sluigi /* don't include VALE prefix */ 558270063Sluigi if (!strncmp(nmr->nr_name, NM_NAME, strlen(NM_NAME))) 559270063Sluigi return EINVAL; 560270063Sluigi ifp = ifunit_ref(nmr->nr_name); 561270063Sluigi if (ifp) { /* already exist, cannot create new one */ 562270063Sluigi if_rele(ifp); 563270063Sluigi return EEXIST; 564270063Sluigi } 565270063Sluigi error = nm_vi_persist(nmr->nr_name, &ifp); 566270063Sluigi if (error) 567270063Sluigi return error; 568270063Sluigi 569270063Sluigi NMG_LOCK(); 570270063Sluigi /* netmap_vp_create creates a struct netmap_vp_adapter */ 571270063Sluigi error = netmap_vp_create(nmr, ifp, &vpna); 572270063Sluigi if (error) { 573270063Sluigi D("error %d", error); 574270063Sluigi nm_vi_detach(ifp); 575270063Sluigi return error; 576270063Sluigi } 577270063Sluigi /* persist-specific routines */ 578270063Sluigi vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl; 579270063Sluigi vpna->up.nm_dtor = netmap_persist_vp_dtor; 580270063Sluigi netmap_adapter_get(&vpna->up); 581270063Sluigi NMG_UNLOCK(); 582270063Sluigi D("created %s", ifp->if_xname); 583270063Sluigi return 0; 584270063Sluigi} 585270063Sluigi 586260368Sluigi/* Try to get a reference to a netmap adapter attached to a VALE switch. 587260368Sluigi * If the adapter is found (or is created), this function returns 0, a 588260368Sluigi * non NULL pointer is returned into *na, and the caller holds a 589260368Sluigi * reference to the adapter. 590260368Sluigi * If an adapter is not found, then no reference is grabbed and the 591260368Sluigi * function returns an error code, or 0 if there is just a VALE prefix 592260368Sluigi * mismatch. Therefore the caller holds a reference when 593260368Sluigi * (*na != NULL && return == 0). 594260368Sluigi */ 595259412Sluigiint 596259412Sluiginetmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create) 597259412Sluigi{ 598270063Sluigi char *nr_name = nmr->nr_name; 599270063Sluigi const char *ifname; 600259412Sluigi struct ifnet *ifp; 601259412Sluigi int error = 0; 602270063Sluigi struct netmap_vp_adapter *vpna, *hostna = NULL; 603259412Sluigi struct nm_bridge *b; 604259412Sluigi int i, j, cand = -1, cand2 = -1; 605259412Sluigi int needed; 606259412Sluigi 607259412Sluigi *na = NULL; /* default return value */ 608259412Sluigi 609259412Sluigi /* first try to see if this is a bridge port. */ 610259412Sluigi NMG_LOCK_ASSERT(); 611270063Sluigi if (strncmp(nr_name, NM_NAME, sizeof(NM_NAME) - 1)) { 612259412Sluigi return 0; /* no error, but no VALE prefix */ 613259412Sluigi } 614259412Sluigi 615270063Sluigi b = nm_find_bridge(nr_name, create); 616259412Sluigi if (b == NULL) { 617270063Sluigi D("no bridges available for '%s'", nr_name); 618260700Sluigi return (create ? ENOMEM : ENXIO); 619259412Sluigi } 620270063Sluigi if (strlen(nr_name) < b->bdg_namelen) /* impossible */ 621270063Sluigi panic("x"); 622259412Sluigi 623259412Sluigi /* Now we are sure that name starts with the bridge's name, 624259412Sluigi * lookup the port in the bridge. We need to scan the entire 625259412Sluigi * list. It is not important to hold a WLOCK on the bridge 626259412Sluigi * during the search because NMG_LOCK already guarantees 627259412Sluigi * that there are no other possible writers. 628259412Sluigi */ 629259412Sluigi 630259412Sluigi /* lookup in the local list of ports */ 631259412Sluigi for (j = 0; j < b->bdg_active_ports; j++) { 632259412Sluigi i = b->bdg_port_index[j]; 633259412Sluigi vpna = b->bdg_ports[i]; 634259412Sluigi // KASSERT(na != NULL); 635270063Sluigi D("checking %s", vpna->up.name); 636270063Sluigi if (!strcmp(vpna->up.name, nr_name)) { 637259412Sluigi netmap_adapter_get(&vpna->up); 638270063Sluigi ND("found existing if %s refs %d", nr_name) 639270063Sluigi *na = &vpna->up; 640259412Sluigi return 0; 641259412Sluigi } 642259412Sluigi } 643259412Sluigi /* not found, should we create it? */ 644259412Sluigi if (!create) 645259412Sluigi return ENXIO; 646259412Sluigi /* yes we should, see if we have space to attach entries */ 647259412Sluigi needed = 2; /* in some cases we only need 1 */ 648259412Sluigi if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) { 649259412Sluigi D("bridge full %d, cannot create new port", b->bdg_active_ports); 650260700Sluigi return ENOMEM; 651259412Sluigi } 652259412Sluigi /* record the next two ports available, but do not allocate yet */ 653259412Sluigi cand = b->bdg_port_index[b->bdg_active_ports]; 654259412Sluigi cand2 = b->bdg_port_index[b->bdg_active_ports + 1]; 655259412Sluigi ND("+++ bridge %s port %s used %d avail %d %d", 656270063Sluigi b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2); 657259412Sluigi 658259412Sluigi /* 659259412Sluigi * try see if there is a matching NIC with this name 660259412Sluigi * (after the bridge's name) 661259412Sluigi */ 662270063Sluigi ifname = nr_name + b->bdg_namelen + 1; 663270063Sluigi ifp = ifunit_ref(ifname); 664270063Sluigi if (!ifp) { 665270063Sluigi /* Create an ephemeral virtual port 666270063Sluigi * This block contains all the ephemeral-specific logics 667270063Sluigi */ 668259412Sluigi if (nmr->nr_cmd) { 669259412Sluigi /* nr_cmd must be 0 for a virtual port */ 670259412Sluigi return EINVAL; 671259412Sluigi } 672259412Sluigi 673259412Sluigi /* bdg_netmap_attach creates a struct netmap_adapter */ 674270063Sluigi error = netmap_vp_create(nmr, NULL, &vpna); 675259412Sluigi if (error) { 676259412Sluigi D("error %d", error); 677259412Sluigi free(ifp, M_DEVBUF); 678259412Sluigi return error; 679259412Sluigi } 680270063Sluigi /* shortcut - we can skip get_hw_na(), 681270063Sluigi * ownership check and nm_bdg_attach() 682270063Sluigi */ 683270063Sluigi } else { 684270063Sluigi struct netmap_adapter *hw; 685259412Sluigi 686270063Sluigi error = netmap_get_hw_na(ifp, &hw); 687270063Sluigi if (error || hw == NULL) 688259412Sluigi goto out; 689259412Sluigi 690270063Sluigi /* host adapter might not be created */ 691270063Sluigi error = hw->nm_bdg_attach(nr_name, hw); 692270063Sluigi if (error) 693259412Sluigi goto out; 694270063Sluigi vpna = hw->na_vp; 695270063Sluigi hostna = hw->na_hostvp; 696270063Sluigi if_rele(ifp); 697259412Sluigi if (nmr->nr_arg1 != NETMAP_BDG_HOST) 698270063Sluigi hostna = NULL; 699259412Sluigi } 700259412Sluigi 701259412Sluigi BDG_WLOCK(b); 702259412Sluigi vpna->bdg_port = cand; 703259412Sluigi ND("NIC %p to bridge port %d", vpna, cand); 704259412Sluigi /* bind the port to the bridge (virtual ports are not active) */ 705259412Sluigi b->bdg_ports[cand] = vpna; 706259412Sluigi vpna->na_bdg = b; 707259412Sluigi b->bdg_active_ports++; 708270063Sluigi if (hostna != NULL) { 709259412Sluigi /* also bind the host stack to the bridge */ 710259412Sluigi b->bdg_ports[cand2] = hostna; 711259412Sluigi hostna->bdg_port = cand2; 712259412Sluigi hostna->na_bdg = b; 713259412Sluigi b->bdg_active_ports++; 714259412Sluigi ND("host %p to bridge port %d", hostna, cand2); 715259412Sluigi } 716270063Sluigi ND("if %s refs %d", ifname, vpna->up.na_refcount); 717259412Sluigi BDG_WUNLOCK(b); 718270063Sluigi *na = &vpna->up; 719270063Sluigi netmap_adapter_get(*na); 720259412Sluigi return 0; 721259412Sluigi 722259412Sluigiout: 723259412Sluigi if_rele(ifp); 724259412Sluigi 725259412Sluigi return error; 726259412Sluigi} 727259412Sluigi 728259412Sluigi 729270063Sluigi/* Process NETMAP_BDG_ATTACH */ 730259412Sluigistatic int 731270063Sluiginm_bdg_ctl_attach(struct nmreq *nmr) 732259412Sluigi{ 733259412Sluigi struct netmap_adapter *na; 734259412Sluigi int error; 735259412Sluigi 736259412Sluigi NMG_LOCK(); 737260700Sluigi 738260368Sluigi error = netmap_get_bdg_na(nmr, &na, 1 /* create if not exists */); 739270063Sluigi if (error) /* no device */ 740259412Sluigi goto unlock_exit; 741260700Sluigi 742260368Sluigi if (na == NULL) { /* VALE prefix missing */ 743259412Sluigi error = EINVAL; 744260368Sluigi goto unlock_exit; 745259412Sluigi } 746259412Sluigi 747270063Sluigi if (NETMAP_OWNED_BY_ANY(na)) { 748259412Sluigi error = EBUSY; 749259412Sluigi goto unref_exit; 750259412Sluigi } 751259412Sluigi 752270063Sluigi if (na->nm_bdg_ctl) { 753270063Sluigi /* nop for VALE ports. The bwrap needs to put the hwna 754270063Sluigi * in netmap mode (see netmap_bwrap_bdg_ctl) 755270063Sluigi */ 756270063Sluigi error = na->nm_bdg_ctl(na, nmr, 1); 757270063Sluigi if (error) 758270063Sluigi goto unref_exit; 759270063Sluigi ND("registered %s to netmap-mode", na->name); 760259412Sluigi } 761259412Sluigi NMG_UNLOCK(); 762259412Sluigi return 0; 763259412Sluigi 764259412Sluigiunref_exit: 765259412Sluigi netmap_adapter_put(na); 766259412Sluigiunlock_exit: 767259412Sluigi NMG_UNLOCK(); 768259412Sluigi return error; 769259412Sluigi} 770259412Sluigi 771260368Sluigi 772270063Sluigi/* process NETMAP_BDG_DETACH */ 773259412Sluigistatic int 774270063Sluiginm_bdg_ctl_detach(struct nmreq *nmr) 775259412Sluigi{ 776259412Sluigi struct netmap_adapter *na; 777259412Sluigi int error; 778259412Sluigi 779259412Sluigi NMG_LOCK(); 780260368Sluigi error = netmap_get_bdg_na(nmr, &na, 0 /* don't create */); 781259412Sluigi if (error) { /* no device, or another bridge or user owns the device */ 782259412Sluigi goto unlock_exit; 783259412Sluigi } 784260700Sluigi 785260368Sluigi if (na == NULL) { /* VALE prefix missing */ 786259412Sluigi error = EINVAL; 787260368Sluigi goto unlock_exit; 788259412Sluigi } 789260368Sluigi 790270063Sluigi if (na->nm_bdg_ctl) { 791270063Sluigi /* remove the port from bridge. The bwrap 792270063Sluigi * also needs to put the hwna in normal mode 793270063Sluigi */ 794270063Sluigi error = na->nm_bdg_ctl(na, nmr, 0); 795259412Sluigi } 796259412Sluigi 797259412Sluigi netmap_adapter_put(na); 798259412Sluigiunlock_exit: 799259412Sluigi NMG_UNLOCK(); 800259412Sluigi return error; 801259412Sluigi 802259412Sluigi} 803259412Sluigi 804259412Sluigi 805270063Sluigi/* Called by either user's context (netmap_ioctl()) 806270063Sluigi * or external kernel modules (e.g., Openvswitch). 807270063Sluigi * Operation is indicated in nmr->nr_cmd. 808270063Sluigi * NETMAP_BDG_OPS that sets configure/lookup/dtor functions to the bridge 809270063Sluigi * requires bdg_ops argument; the other commands ignore this argument. 810270063Sluigi * 811259412Sluigi * Called without NMG_LOCK. 812259412Sluigi */ 813259412Sluigiint 814270063Sluiginetmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops) 815259412Sluigi{ 816259412Sluigi struct nm_bridge *b; 817259412Sluigi struct netmap_adapter *na; 818259412Sluigi struct netmap_vp_adapter *vpna; 819259412Sluigi char *name = nmr->nr_name; 820259412Sluigi int cmd = nmr->nr_cmd, namelen = strlen(name); 821259412Sluigi int error = 0, i, j; 822259412Sluigi 823259412Sluigi switch (cmd) { 824270063Sluigi case NETMAP_BDG_NEWIF: 825270063Sluigi error = nm_vi_create(nmr); 826270063Sluigi break; 827270063Sluigi 828270063Sluigi case NETMAP_BDG_DELIF: 829270063Sluigi error = nm_vi_destroy(nmr->nr_name); 830270063Sluigi break; 831270063Sluigi 832259412Sluigi case NETMAP_BDG_ATTACH: 833270063Sluigi error = nm_bdg_ctl_attach(nmr); 834259412Sluigi break; 835259412Sluigi 836259412Sluigi case NETMAP_BDG_DETACH: 837270063Sluigi error = nm_bdg_ctl_detach(nmr); 838259412Sluigi break; 839259412Sluigi 840259412Sluigi case NETMAP_BDG_LIST: 841259412Sluigi /* this is used to enumerate bridges and ports */ 842259412Sluigi if (namelen) { /* look up indexes of bridge and port */ 843259412Sluigi if (strncmp(name, NM_NAME, strlen(NM_NAME))) { 844259412Sluigi error = EINVAL; 845259412Sluigi break; 846259412Sluigi } 847259412Sluigi NMG_LOCK(); 848259412Sluigi b = nm_find_bridge(name, 0 /* don't create */); 849259412Sluigi if (!b) { 850259412Sluigi error = ENOENT; 851259412Sluigi NMG_UNLOCK(); 852259412Sluigi break; 853259412Sluigi } 854259412Sluigi 855270063Sluigi name = name + b->bdg_namelen + 1; 856259412Sluigi error = ENOENT; 857259412Sluigi for (j = 0; j < b->bdg_active_ports; j++) { 858259412Sluigi i = b->bdg_port_index[j]; 859259412Sluigi vpna = b->bdg_ports[i]; 860259412Sluigi if (vpna == NULL) { 861259412Sluigi D("---AAAAAAAAARGH-------"); 862259412Sluigi continue; 863259412Sluigi } 864259412Sluigi /* the former and the latter identify a 865259412Sluigi * virtual port and a NIC, respectively 866259412Sluigi */ 867270063Sluigi if (!strcmp(vpna->up.name, name)) { 868259412Sluigi /* bridge index */ 869259412Sluigi nmr->nr_arg1 = b - nm_bridges; 870259412Sluigi nmr->nr_arg2 = i; /* port index */ 871259412Sluigi error = 0; 872259412Sluigi break; 873259412Sluigi } 874259412Sluigi } 875259412Sluigi NMG_UNLOCK(); 876259412Sluigi } else { 877259412Sluigi /* return the first non-empty entry starting from 878259412Sluigi * bridge nr_arg1 and port nr_arg2. 879259412Sluigi * 880259412Sluigi * Users can detect the end of the same bridge by 881259412Sluigi * seeing the new and old value of nr_arg1, and can 882259412Sluigi * detect the end of all the bridge by error != 0 883259412Sluigi */ 884259412Sluigi i = nmr->nr_arg1; 885259412Sluigi j = nmr->nr_arg2; 886259412Sluigi 887259412Sluigi NMG_LOCK(); 888259412Sluigi for (error = ENOENT; i < NM_BRIDGES; i++) { 889259412Sluigi b = nm_bridges + i; 890259412Sluigi if (j >= b->bdg_active_ports) { 891259412Sluigi j = 0; /* following bridges scan from 0 */ 892259412Sluigi continue; 893259412Sluigi } 894259412Sluigi nmr->nr_arg1 = i; 895259412Sluigi nmr->nr_arg2 = j; 896259412Sluigi j = b->bdg_port_index[j]; 897259412Sluigi vpna = b->bdg_ports[j]; 898270063Sluigi strncpy(name, vpna->up.name, (size_t)IFNAMSIZ); 899259412Sluigi error = 0; 900259412Sluigi break; 901259412Sluigi } 902259412Sluigi NMG_UNLOCK(); 903259412Sluigi } 904259412Sluigi break; 905259412Sluigi 906270063Sluigi case NETMAP_BDG_REGOPS: /* XXX this should not be available from userspace */ 907270063Sluigi /* register callbacks to the given bridge. 908259412Sluigi * nmr->nr_name may be just bridge's name (including ':' 909259412Sluigi * if it is not just NM_NAME). 910259412Sluigi */ 911270063Sluigi if (!bdg_ops) { 912259412Sluigi error = EINVAL; 913259412Sluigi break; 914259412Sluigi } 915259412Sluigi NMG_LOCK(); 916259412Sluigi b = nm_find_bridge(name, 0 /* don't create */); 917259412Sluigi if (!b) { 918259412Sluigi error = EINVAL; 919259412Sluigi } else { 920270063Sluigi b->bdg_ops = *bdg_ops; 921259412Sluigi } 922259412Sluigi NMG_UNLOCK(); 923259412Sluigi break; 924259412Sluigi 925261909Sluigi case NETMAP_BDG_VNET_HDR: 926261909Sluigi /* Valid lengths for the virtio-net header are 0 (no header), 927261909Sluigi 10 and 12. */ 928261909Sluigi if (nmr->nr_arg1 != 0 && 929261909Sluigi nmr->nr_arg1 != sizeof(struct nm_vnet_hdr) && 930261909Sluigi nmr->nr_arg1 != 12) { 931261909Sluigi error = EINVAL; 932261909Sluigi break; 933261909Sluigi } 934259412Sluigi NMG_LOCK(); 935259412Sluigi error = netmap_get_bdg_na(nmr, &na, 0); 936260368Sluigi if (na && !error) { 937259412Sluigi vpna = (struct netmap_vp_adapter *)na; 938261909Sluigi vpna->virt_hdr_len = nmr->nr_arg1; 939261909Sluigi if (vpna->virt_hdr_len) 940270063Sluigi vpna->mfs = NETMAP_BUF_SIZE(na); 941261909Sluigi D("Using vnet_hdr_len %d for %p", vpna->virt_hdr_len, vpna); 942260368Sluigi netmap_adapter_put(na); 943259412Sluigi } 944259412Sluigi NMG_UNLOCK(); 945259412Sluigi break; 946259412Sluigi 947259412Sluigi default: 948259412Sluigi D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd); 949259412Sluigi error = EINVAL; 950259412Sluigi break; 951259412Sluigi } 952259412Sluigi return error; 953259412Sluigi} 954259412Sluigi 955270063Sluigiint 956270063Sluiginetmap_bdg_config(struct nmreq *nmr) 957270063Sluigi{ 958270063Sluigi struct nm_bridge *b; 959270063Sluigi int error = EINVAL; 960270063Sluigi 961270063Sluigi NMG_LOCK(); 962270063Sluigi b = nm_find_bridge(nmr->nr_name, 0); 963270063Sluigi if (!b) { 964270063Sluigi NMG_UNLOCK(); 965270063Sluigi return error; 966270063Sluigi } 967270063Sluigi NMG_UNLOCK(); 968270063Sluigi /* Don't call config() with NMG_LOCK() held */ 969270063Sluigi BDG_RLOCK(b); 970270063Sluigi if (b->bdg_ops.config != NULL) 971270063Sluigi error = b->bdg_ops.config((struct nm_ifreq *)nmr); 972270063Sluigi BDG_RUNLOCK(b); 973270063Sluigi return error; 974270063Sluigi} 975270063Sluigi 976270063Sluigi 977270063Sluigi/* nm_krings_create callback for VALE ports. 978270063Sluigi * Calls the standard netmap_krings_create, then adds leases on rx 979270063Sluigi * rings and bdgfwd on tx rings. 980270063Sluigi */ 981259412Sluigistatic int 982259412Sluiginetmap_vp_krings_create(struct netmap_adapter *na) 983259412Sluigi{ 984261909Sluigi u_int tailroom; 985259412Sluigi int error, i; 986259412Sluigi uint32_t *leases; 987261909Sluigi u_int nrx = netmap_real_rx_rings(na); 988259412Sluigi 989259412Sluigi /* 990259412Sluigi * Leases are attached to RX rings on vale ports 991259412Sluigi */ 992259412Sluigi tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx; 993259412Sluigi 994261909Sluigi error = netmap_krings_create(na, tailroom); 995259412Sluigi if (error) 996259412Sluigi return error; 997259412Sluigi 998259412Sluigi leases = na->tailroom; 999259412Sluigi 1000259412Sluigi for (i = 0; i < nrx; i++) { /* Receive rings */ 1001259412Sluigi na->rx_rings[i].nkr_leases = leases; 1002259412Sluigi leases += na->num_rx_desc; 1003259412Sluigi } 1004259412Sluigi 1005259412Sluigi error = nm_alloc_bdgfwd(na); 1006259412Sluigi if (error) { 1007259412Sluigi netmap_krings_delete(na); 1008259412Sluigi return error; 1009259412Sluigi } 1010259412Sluigi 1011259412Sluigi return 0; 1012259412Sluigi} 1013259412Sluigi 1014260368Sluigi 1015270063Sluigi/* nm_krings_delete callback for VALE ports. */ 1016259412Sluigistatic void 1017259412Sluiginetmap_vp_krings_delete(struct netmap_adapter *na) 1018259412Sluigi{ 1019259412Sluigi nm_free_bdgfwd(na); 1020259412Sluigi netmap_krings_delete(na); 1021259412Sluigi} 1022259412Sluigi 1023259412Sluigi 1024259412Sluigistatic int 1025259412Sluiginm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, 1026259412Sluigi struct netmap_vp_adapter *na, u_int ring_nr); 1027259412Sluigi 1028259412Sluigi 1029259412Sluigi/* 1030270063Sluigi * main dispatch routine for the bridge. 1031259412Sluigi * Grab packets from a kring, move them into the ft structure 1032259412Sluigi * associated to the tx (input) port. Max one instance per port, 1033259412Sluigi * filtered on input (ioctl, poll or XXX). 1034259412Sluigi * Returns the next position in the ring. 1035259412Sluigi */ 1036259412Sluigistatic int 1037270063Sluiginm_bdg_preflush(struct netmap_kring *kring, u_int end) 1038259412Sluigi{ 1039270063Sluigi struct netmap_vp_adapter *na = 1040270063Sluigi (struct netmap_vp_adapter*)kring->na; 1041259412Sluigi struct netmap_ring *ring = kring->ring; 1042259412Sluigi struct nm_bdg_fwd *ft; 1043270063Sluigi u_int ring_nr = kring->ring_id; 1044259412Sluigi u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1; 1045259412Sluigi u_int ft_i = 0; /* start from 0 */ 1046259412Sluigi u_int frags = 1; /* how many frags ? */ 1047259412Sluigi struct nm_bridge *b = na->na_bdg; 1048259412Sluigi 1049259412Sluigi /* To protect against modifications to the bridge we acquire a 1050259412Sluigi * shared lock, waiting if we can sleep (if the source port is 1051259412Sluigi * attached to a user process) or with a trylock otherwise (NICs). 1052259412Sluigi */ 1053259412Sluigi ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j); 1054259412Sluigi if (na->up.na_flags & NAF_BDG_MAYSLEEP) 1055259412Sluigi BDG_RLOCK(b); 1056259412Sluigi else if (!BDG_RTRYLOCK(b)) 1057259412Sluigi return 0; 1058259412Sluigi ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j); 1059259412Sluigi ft = kring->nkr_ft; 1060259412Sluigi 1061259412Sluigi for (; likely(j != end); j = nm_next(j, lim)) { 1062259412Sluigi struct netmap_slot *slot = &ring->slot[j]; 1063259412Sluigi char *buf; 1064259412Sluigi 1065259412Sluigi ft[ft_i].ft_len = slot->len; 1066259412Sluigi ft[ft_i].ft_flags = slot->flags; 1067259412Sluigi 1068259412Sluigi ND("flags is 0x%x", slot->flags); 1069259412Sluigi /* this slot goes into a list so initialize the link field */ 1070259412Sluigi ft[ft_i].ft_next = NM_FT_NULL; 1071259412Sluigi buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ? 1072270063Sluigi (void *)(uintptr_t)slot->ptr : NMB(&na->up, slot); 1073267151Sluigi if (unlikely(buf == NULL)) { 1074267151Sluigi RD(5, "NULL %s buffer pointer from %s slot %d len %d", 1075267151Sluigi (slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT", 1076267151Sluigi kring->name, j, ft[ft_i].ft_len); 1077270063Sluigi buf = ft[ft_i].ft_buf = NETMAP_BUF_BASE(&na->up); 1078267151Sluigi ft[ft_i].ft_len = 0; 1079267151Sluigi ft[ft_i].ft_flags = 0; 1080267151Sluigi } 1081259487Sluigi __builtin_prefetch(buf); 1082259412Sluigi ++ft_i; 1083259412Sluigi if (slot->flags & NS_MOREFRAG) { 1084259412Sluigi frags++; 1085259412Sluigi continue; 1086259412Sluigi } 1087259412Sluigi if (unlikely(netmap_verbose && frags > 1)) 1088259412Sluigi RD(5, "%d frags at %d", frags, ft_i - frags); 1089259412Sluigi ft[ft_i - frags].ft_frags = frags; 1090259412Sluigi frags = 1; 1091259412Sluigi if (unlikely((int)ft_i >= bridge_batch)) 1092259412Sluigi ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); 1093259412Sluigi } 1094259412Sluigi if (frags > 1) { 1095259412Sluigi D("truncate incomplete fragment at %d (%d frags)", ft_i, frags); 1096259412Sluigi // ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG 1097259412Sluigi ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG; 1098259412Sluigi ft[ft_i - frags].ft_frags = frags - 1; 1099259412Sluigi } 1100259412Sluigi if (ft_i) 1101259412Sluigi ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); 1102259412Sluigi BDG_RUNLOCK(b); 1103259412Sluigi return j; 1104259412Sluigi} 1105259412Sluigi 1106259412Sluigi 1107259412Sluigi/* ----- FreeBSD if_bridge hash function ------- */ 1108259412Sluigi 1109259412Sluigi/* 1110259412Sluigi * The following hash function is adapted from "Hash Functions" by Bob Jenkins 1111259412Sluigi * ("Algorithm Alley", Dr. Dobbs Journal, September 1997). 1112259412Sluigi * 1113259412Sluigi * http://www.burtleburtle.net/bob/hash/spooky.html 1114259412Sluigi */ 1115259412Sluigi#define mix(a, b, c) \ 1116259412Sluigido { \ 1117259412Sluigi a -= b; a -= c; a ^= (c >> 13); \ 1118259412Sluigi b -= c; b -= a; b ^= (a << 8); \ 1119259412Sluigi c -= a; c -= b; c ^= (b >> 13); \ 1120259412Sluigi a -= b; a -= c; a ^= (c >> 12); \ 1121259412Sluigi b -= c; b -= a; b ^= (a << 16); \ 1122259412Sluigi c -= a; c -= b; c ^= (b >> 5); \ 1123259412Sluigi a -= b; a -= c; a ^= (c >> 3); \ 1124259412Sluigi b -= c; b -= a; b ^= (a << 10); \ 1125259412Sluigi c -= a; c -= b; c ^= (b >> 15); \ 1126259412Sluigi} while (/*CONSTCOND*/0) 1127259412Sluigi 1128260368Sluigi 1129259412Sluigistatic __inline uint32_t 1130259412Sluiginm_bridge_rthash(const uint8_t *addr) 1131259412Sluigi{ 1132259412Sluigi uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key 1133259412Sluigi 1134259412Sluigi b += addr[5] << 8; 1135259412Sluigi b += addr[4]; 1136259412Sluigi a += addr[3] << 24; 1137259412Sluigi a += addr[2] << 16; 1138259412Sluigi a += addr[1] << 8; 1139259412Sluigi a += addr[0]; 1140259412Sluigi 1141259412Sluigi mix(a, b, c); 1142259412Sluigi#define BRIDGE_RTHASH_MASK (NM_BDG_HASH-1) 1143259412Sluigi return (c & BRIDGE_RTHASH_MASK); 1144259412Sluigi} 1145259412Sluigi 1146259412Sluigi#undef mix 1147259412Sluigi 1148259412Sluigi 1149270063Sluigi/* nm_register callback for VALE ports */ 1150259412Sluigistatic int 1151270063Sluiginetmap_vp_reg(struct netmap_adapter *na, int onoff) 1152259412Sluigi{ 1153259412Sluigi struct netmap_vp_adapter *vpna = 1154259412Sluigi (struct netmap_vp_adapter*)na; 1155259412Sluigi 1156270063Sluigi /* persistent ports may be put in netmap mode 1157270063Sluigi * before being attached to a bridge 1158259412Sluigi */ 1159270063Sluigi if (vpna->na_bdg) 1160270063Sluigi BDG_WLOCK(vpna->na_bdg); 1161259412Sluigi if (onoff) { 1162270063Sluigi na->na_flags |= NAF_NETMAP_ON; 1163270063Sluigi /* XXX on FreeBSD, persistent VALE ports should also 1164270063Sluigi * toggle IFCAP_NETMAP in na->ifp (2014-03-16) 1165270063Sluigi */ 1166259412Sluigi } else { 1167270063Sluigi na->na_flags &= ~NAF_NETMAP_ON; 1168259412Sluigi } 1169270063Sluigi if (vpna->na_bdg) 1170270063Sluigi BDG_WUNLOCK(vpna->na_bdg); 1171259412Sluigi return 0; 1172259412Sluigi} 1173259412Sluigi 1174259412Sluigi 1175259412Sluigi/* 1176259412Sluigi * Lookup function for a learning bridge. 1177259412Sluigi * Update the hash table with the source address, 1178259412Sluigi * and then returns the destination port index, and the 1179259412Sluigi * ring in *dst_ring (at the moment, always use ring 0) 1180259412Sluigi */ 1181259412Sluigiu_int 1182270063Sluiginetmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, 1183270063Sluigi const struct netmap_vp_adapter *na) 1184259412Sluigi{ 1185270063Sluigi uint8_t *buf = ft->ft_buf; 1186270063Sluigi u_int buf_len = ft->ft_len; 1187259412Sluigi struct nm_hash_ent *ht = na->na_bdg->ht; 1188259412Sluigi uint32_t sh, dh; 1189259412Sluigi u_int dst, mysrc = na->bdg_port; 1190259412Sluigi uint64_t smac, dmac; 1191259412Sluigi 1192270063Sluigi /* safety check, unfortunately we have many cases */ 1193270063Sluigi if (buf_len >= 14 + na->virt_hdr_len) { 1194270063Sluigi /* virthdr + mac_hdr in the same slot */ 1195270063Sluigi buf += na->virt_hdr_len; 1196270063Sluigi buf_len -= na->virt_hdr_len; 1197270063Sluigi } else if (buf_len == na->virt_hdr_len && ft->ft_flags & NS_MOREFRAG) { 1198270063Sluigi /* only header in first fragment */ 1199270063Sluigi ft++; 1200270063Sluigi buf = ft->ft_buf; 1201270063Sluigi buf_len = ft->ft_len; 1202270063Sluigi } else { 1203270063Sluigi RD(5, "invalid buf format, length %d", buf_len); 1204259412Sluigi return NM_BDG_NOPORT; 1205259412Sluigi } 1206259412Sluigi dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff; 1207259412Sluigi smac = le64toh(*(uint64_t *)(buf + 4)); 1208259412Sluigi smac >>= 16; 1209259412Sluigi 1210259412Sluigi /* 1211259412Sluigi * The hash is somewhat expensive, there might be some 1212259412Sluigi * worthwhile optimizations here. 1213259412Sluigi */ 1214259412Sluigi if ((buf[6] & 1) == 0) { /* valid src */ 1215259412Sluigi uint8_t *s = buf+6; 1216259412Sluigi sh = nm_bridge_rthash(s); // XXX hash of source 1217259412Sluigi /* update source port forwarding entry */ 1218259412Sluigi ht[sh].mac = smac; /* XXX expire ? */ 1219259412Sluigi ht[sh].ports = mysrc; 1220259412Sluigi if (netmap_verbose) 1221259412Sluigi D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d", 1222259412Sluigi s[0], s[1], s[2], s[3], s[4], s[5], mysrc); 1223259412Sluigi } 1224259412Sluigi dst = NM_BDG_BROADCAST; 1225259412Sluigi if ((buf[0] & 1) == 0) { /* unicast */ 1226259412Sluigi dh = nm_bridge_rthash(buf); // XXX hash of dst 1227259412Sluigi if (ht[dh].mac == dmac) { /* found dst */ 1228259412Sluigi dst = ht[dh].ports; 1229259412Sluigi } 1230259412Sluigi /* XXX otherwise return NM_BDG_UNKNOWN ? */ 1231259412Sluigi } 1232259412Sluigi *dst_ring = 0; 1233259412Sluigi return dst; 1234259412Sluigi} 1235259412Sluigi 1236259412Sluigi 1237259412Sluigi/* 1238260368Sluigi * Available space in the ring. Only used in VALE code 1239260368Sluigi * and only with is_rx = 1 1240260368Sluigi */ 1241260368Sluigistatic inline uint32_t 1242260368Sluiginm_kr_space(struct netmap_kring *k, int is_rx) 1243260368Sluigi{ 1244260368Sluigi int space; 1245260368Sluigi 1246260368Sluigi if (is_rx) { 1247260368Sluigi int busy = k->nkr_hwlease - k->nr_hwcur; 1248260368Sluigi if (busy < 0) 1249260368Sluigi busy += k->nkr_num_slots; 1250260368Sluigi space = k->nkr_num_slots - 1 - busy; 1251260368Sluigi } else { 1252260368Sluigi /* XXX never used in this branch */ 1253260368Sluigi space = k->nr_hwtail - k->nkr_hwlease; 1254260368Sluigi if (space < 0) 1255260368Sluigi space += k->nkr_num_slots; 1256260368Sluigi } 1257260368Sluigi#if 0 1258260368Sluigi // sanity check 1259260368Sluigi if (k->nkr_hwlease >= k->nkr_num_slots || 1260260368Sluigi k->nr_hwcur >= k->nkr_num_slots || 1261260368Sluigi k->nr_tail >= k->nkr_num_slots || 1262260368Sluigi busy < 0 || 1263260368Sluigi busy >= k->nkr_num_slots) { 1264260368Sluigi D("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d", k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease, 1265260368Sluigi k->nkr_lease_idx, k->nkr_num_slots); 1266260368Sluigi } 1267260368Sluigi#endif 1268260368Sluigi return space; 1269260368Sluigi} 1270260368Sluigi 1271260368Sluigi 1272260368Sluigi 1273260368Sluigi 1274260368Sluigi/* make a lease on the kring for N positions. return the 1275260368Sluigi * lease index 1276260368Sluigi * XXX only used in VALE code and with is_rx = 1 1277260368Sluigi */ 1278260368Sluigistatic inline uint32_t 1279260368Sluiginm_kr_lease(struct netmap_kring *k, u_int n, int is_rx) 1280260368Sluigi{ 1281260368Sluigi uint32_t lim = k->nkr_num_slots - 1; 1282260368Sluigi uint32_t lease_idx = k->nkr_lease_idx; 1283260368Sluigi 1284260368Sluigi k->nkr_leases[lease_idx] = NR_NOSLOT; 1285260368Sluigi k->nkr_lease_idx = nm_next(lease_idx, lim); 1286260368Sluigi 1287260368Sluigi if (n > nm_kr_space(k, is_rx)) { 1288260368Sluigi D("invalid request for %d slots", n); 1289260368Sluigi panic("x"); 1290260368Sluigi } 1291260368Sluigi /* XXX verify that there are n slots */ 1292260368Sluigi k->nkr_hwlease += n; 1293260368Sluigi if (k->nkr_hwlease > lim) 1294260368Sluigi k->nkr_hwlease -= lim + 1; 1295260368Sluigi 1296260368Sluigi if (k->nkr_hwlease >= k->nkr_num_slots || 1297260368Sluigi k->nr_hwcur >= k->nkr_num_slots || 1298260368Sluigi k->nr_hwtail >= k->nkr_num_slots || 1299260368Sluigi k->nkr_lease_idx >= k->nkr_num_slots) { 1300260368Sluigi D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d", 1301270063Sluigi k->na->name, 1302260368Sluigi k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease, 1303260368Sluigi k->nkr_lease_idx, k->nkr_num_slots); 1304260368Sluigi } 1305260368Sluigi return lease_idx; 1306260368Sluigi} 1307260368Sluigi 1308260368Sluigi/* 1309270063Sluigi * 1310259412Sluigi * This flush routine supports only unicast and broadcast but a large 1311259412Sluigi * number of ports, and lets us replace the learn and dispatch functions. 1312259412Sluigi */ 1313259412Sluigiint 1314259412Sluiginm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, 1315259412Sluigi u_int ring_nr) 1316259412Sluigi{ 1317259412Sluigi struct nm_bdg_q *dst_ents, *brddst; 1318259412Sluigi uint16_t num_dsts = 0, *dsts; 1319259412Sluigi struct nm_bridge *b = na->na_bdg; 1320259412Sluigi u_int i, j, me = na->bdg_port; 1321259412Sluigi 1322259412Sluigi /* 1323259412Sluigi * The work area (pointed by ft) is followed by an array of 1324259412Sluigi * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS 1325259412Sluigi * queues per port plus one for the broadcast traffic. 1326259412Sluigi * Then we have an array of destination indexes. 1327259412Sluigi */ 1328259412Sluigi dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); 1329259412Sluigi dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1); 1330259412Sluigi 1331259412Sluigi /* first pass: find a destination for each packet in the batch */ 1332259412Sluigi for (i = 0; likely(i < n); i += ft[i].ft_frags) { 1333259412Sluigi uint8_t dst_ring = ring_nr; /* default, same ring as origin */ 1334259412Sluigi uint16_t dst_port, d_i; 1335259412Sluigi struct nm_bdg_q *d; 1336259412Sluigi 1337259412Sluigi ND("slot %d frags %d", i, ft[i].ft_frags); 1338261909Sluigi /* Drop the packet if the virtio-net header is not into the first 1339259412Sluigi fragment nor at the very beginning of the second. */ 1340270063Sluigi if (unlikely(na->virt_hdr_len > ft[i].ft_len)) 1341259412Sluigi continue; 1342270063Sluigi dst_port = b->bdg_ops.lookup(&ft[i], &dst_ring, na); 1343259412Sluigi if (netmap_verbose > 255) 1344259412Sluigi RD(5, "slot %d port %d -> %d", i, me, dst_port); 1345259412Sluigi if (dst_port == NM_BDG_NOPORT) 1346259412Sluigi continue; /* this packet is identified to be dropped */ 1347259412Sluigi else if (unlikely(dst_port > NM_BDG_MAXPORTS)) 1348259412Sluigi continue; 1349259412Sluigi else if (dst_port == NM_BDG_BROADCAST) 1350259412Sluigi dst_ring = 0; /* broadcasts always go to ring 0 */ 1351259412Sluigi else if (unlikely(dst_port == me || 1352259412Sluigi !b->bdg_ports[dst_port])) 1353259412Sluigi continue; 1354259412Sluigi 1355259412Sluigi /* get a position in the scratch pad */ 1356259412Sluigi d_i = dst_port * NM_BDG_MAXRINGS + dst_ring; 1357259412Sluigi d = dst_ents + d_i; 1358259412Sluigi 1359259412Sluigi /* append the first fragment to the list */ 1360259412Sluigi if (d->bq_head == NM_FT_NULL) { /* new destination */ 1361259412Sluigi d->bq_head = d->bq_tail = i; 1362259412Sluigi /* remember this position to be scanned later */ 1363259412Sluigi if (dst_port != NM_BDG_BROADCAST) 1364259412Sluigi dsts[num_dsts++] = d_i; 1365259412Sluigi } else { 1366259412Sluigi ft[d->bq_tail].ft_next = i; 1367259412Sluigi d->bq_tail = i; 1368259412Sluigi } 1369259412Sluigi d->bq_len += ft[i].ft_frags; 1370259412Sluigi } 1371259412Sluigi 1372259412Sluigi /* 1373259412Sluigi * Broadcast traffic goes to ring 0 on all destinations. 1374259412Sluigi * So we need to add these rings to the list of ports to scan. 1375259412Sluigi * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is 1376259412Sluigi * expensive. We should keep a compact list of active destinations 1377259412Sluigi * so we could shorten this loop. 1378259412Sluigi */ 1379259412Sluigi brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS; 1380259412Sluigi if (brddst->bq_head != NM_FT_NULL) { 1381259412Sluigi for (j = 0; likely(j < b->bdg_active_ports); j++) { 1382259412Sluigi uint16_t d_i; 1383259412Sluigi i = b->bdg_port_index[j]; 1384259412Sluigi if (unlikely(i == me)) 1385259412Sluigi continue; 1386259412Sluigi d_i = i * NM_BDG_MAXRINGS; 1387259412Sluigi if (dst_ents[d_i].bq_head == NM_FT_NULL) 1388259412Sluigi dsts[num_dsts++] = d_i; 1389259412Sluigi } 1390259412Sluigi } 1391259412Sluigi 1392259412Sluigi ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts); 1393270063Sluigi /* second pass: scan destinations */ 1394259412Sluigi for (i = 0; i < num_dsts; i++) { 1395259412Sluigi struct netmap_vp_adapter *dst_na; 1396259412Sluigi struct netmap_kring *kring; 1397259412Sluigi struct netmap_ring *ring; 1398261909Sluigi u_int dst_nr, lim, j, d_i, next, brd_next; 1399259412Sluigi u_int needed, howmany; 1400259412Sluigi int retry = netmap_txsync_retry; 1401259412Sluigi struct nm_bdg_q *d; 1402259412Sluigi uint32_t my_start = 0, lease_idx = 0; 1403259412Sluigi int nrings; 1404261909Sluigi int virt_hdr_mismatch = 0; 1405259412Sluigi 1406259412Sluigi d_i = dsts[i]; 1407259412Sluigi ND("second pass %d port %d", i, d_i); 1408259412Sluigi d = dst_ents + d_i; 1409259412Sluigi // XXX fix the division 1410259412Sluigi dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS]; 1411259412Sluigi /* protect from the lookup function returning an inactive 1412259412Sluigi * destination port 1413259412Sluigi */ 1414259412Sluigi if (unlikely(dst_na == NULL)) 1415259412Sluigi goto cleanup; 1416259412Sluigi if (dst_na->up.na_flags & NAF_SW_ONLY) 1417259412Sluigi goto cleanup; 1418259412Sluigi /* 1419259412Sluigi * The interface may be in !netmap mode in two cases: 1420259412Sluigi * - when na is attached but not activated yet; 1421259412Sluigi * - when na is being deactivated but is still attached. 1422259412Sluigi */ 1423270063Sluigi if (unlikely(!nm_netmap_on(&dst_na->up))) { 1424259412Sluigi ND("not in netmap mode!"); 1425259412Sluigi goto cleanup; 1426259412Sluigi } 1427259412Sluigi 1428259412Sluigi /* there is at least one either unicast or broadcast packet */ 1429259412Sluigi brd_next = brddst->bq_head; 1430259412Sluigi next = d->bq_head; 1431259412Sluigi /* we need to reserve this many slots. If fewer are 1432259412Sluigi * available, some packets will be dropped. 1433259412Sluigi * Packets may have multiple fragments, so we may not use 1434259412Sluigi * there is a chance that we may not use all of the slots 1435259412Sluigi * we have claimed, so we will need to handle the leftover 1436259412Sluigi * ones when we regain the lock. 1437259412Sluigi */ 1438259412Sluigi needed = d->bq_len + brddst->bq_len; 1439259412Sluigi 1440261909Sluigi if (unlikely(dst_na->virt_hdr_len != na->virt_hdr_len)) { 1441270063Sluigi RD(3, "virt_hdr_mismatch, src %d dst %d", na->virt_hdr_len, dst_na->virt_hdr_len); 1442261909Sluigi /* There is a virtio-net header/offloadings mismatch between 1443261909Sluigi * source and destination. The slower mismatch datapath will 1444261909Sluigi * be used to cope with all the mismatches. 1445261909Sluigi */ 1446261909Sluigi virt_hdr_mismatch = 1; 1447261909Sluigi if (dst_na->mfs < na->mfs) { 1448261909Sluigi /* We may need to do segmentation offloadings, and so 1449261909Sluigi * we may need a number of destination slots greater 1450261909Sluigi * than the number of input slots ('needed'). 1451261909Sluigi * We look for the smallest integer 'x' which satisfies: 1452261909Sluigi * needed * na->mfs + x * H <= x * na->mfs 1453261909Sluigi * where 'H' is the length of the longest header that may 1454261909Sluigi * be replicated in the segmentation process (e.g. for 1455261909Sluigi * TCPv4 we must account for ethernet header, IP header 1456261909Sluigi * and TCPv4 header). 1457261909Sluigi */ 1458261909Sluigi needed = (needed * na->mfs) / 1459261909Sluigi (dst_na->mfs - WORST_CASE_GSO_HEADER) + 1; 1460261909Sluigi ND(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed); 1461261909Sluigi } 1462261909Sluigi } 1463261909Sluigi 1464259412Sluigi ND(5, "pass 2 dst %d is %x %s", 1465259412Sluigi i, d_i, is_vp ? "virtual" : "nic/host"); 1466259412Sluigi dst_nr = d_i & (NM_BDG_MAXRINGS-1); 1467259412Sluigi nrings = dst_na->up.num_rx_rings; 1468259412Sluigi if (dst_nr >= nrings) 1469259412Sluigi dst_nr = dst_nr % nrings; 1470259412Sluigi kring = &dst_na->up.rx_rings[dst_nr]; 1471259412Sluigi ring = kring->ring; 1472259412Sluigi lim = kring->nkr_num_slots - 1; 1473259412Sluigi 1474259412Sluigiretry: 1475259412Sluigi 1476261909Sluigi if (dst_na->retry && retry) { 1477261909Sluigi /* try to get some free slot from the previous run */ 1478261909Sluigi dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0); 1479270063Sluigi /* actually useful only for bwraps, since there 1480270063Sluigi * the notify will trigger a txsync on the hwna. VALE ports 1481270063Sluigi * have dst_na->retry == 0 1482270063Sluigi */ 1483261909Sluigi } 1484259412Sluigi /* reserve the buffers in the queue and an entry 1485259412Sluigi * to report completion, and drop lock. 1486259412Sluigi * XXX this might become a helper function. 1487259412Sluigi */ 1488259412Sluigi mtx_lock(&kring->q_lock); 1489259412Sluigi if (kring->nkr_stopped) { 1490259412Sluigi mtx_unlock(&kring->q_lock); 1491259412Sluigi goto cleanup; 1492259412Sluigi } 1493259412Sluigi my_start = j = kring->nkr_hwlease; 1494259412Sluigi howmany = nm_kr_space(kring, 1); 1495259412Sluigi if (needed < howmany) 1496259412Sluigi howmany = needed; 1497259412Sluigi lease_idx = nm_kr_lease(kring, howmany, 1); 1498259412Sluigi mtx_unlock(&kring->q_lock); 1499259412Sluigi 1500259412Sluigi /* only retry if we need more than available slots */ 1501259412Sluigi if (retry && needed <= howmany) 1502259412Sluigi retry = 0; 1503259412Sluigi 1504259412Sluigi /* copy to the destination queue */ 1505259412Sluigi while (howmany > 0) { 1506259412Sluigi struct netmap_slot *slot; 1507259412Sluigi struct nm_bdg_fwd *ft_p, *ft_end; 1508259412Sluigi u_int cnt; 1509259412Sluigi 1510259412Sluigi /* find the queue from which we pick next packet. 1511259412Sluigi * NM_FT_NULL is always higher than valid indexes 1512259412Sluigi * so we never dereference it if the other list 1513259412Sluigi * has packets (and if both are empty we never 1514259412Sluigi * get here). 1515259412Sluigi */ 1516259412Sluigi if (next < brd_next) { 1517259412Sluigi ft_p = ft + next; 1518259412Sluigi next = ft_p->ft_next; 1519259412Sluigi } else { /* insert broadcast */ 1520259412Sluigi ft_p = ft + brd_next; 1521259412Sluigi brd_next = ft_p->ft_next; 1522259412Sluigi } 1523259412Sluigi cnt = ft_p->ft_frags; // cnt > 0 1524259412Sluigi if (unlikely(cnt > howmany)) 1525259412Sluigi break; /* no more space */ 1526259412Sluigi if (netmap_verbose && cnt > 1) 1527259412Sluigi RD(5, "rx %d frags to %d", cnt, j); 1528259412Sluigi ft_end = ft_p + cnt; 1529261909Sluigi if (unlikely(virt_hdr_mismatch)) { 1530261909Sluigi bdg_mismatch_datapath(na, dst_na, ft_p, ring, &j, lim, &howmany); 1531261909Sluigi } else { 1532261909Sluigi howmany -= cnt; 1533261909Sluigi do { 1534261909Sluigi char *dst, *src = ft_p->ft_buf; 1535261909Sluigi size_t copy_len = ft_p->ft_len, dst_len = copy_len; 1536259412Sluigi 1537261909Sluigi slot = &ring->slot[j]; 1538270063Sluigi dst = NMB(&dst_na->up, slot); 1539259412Sluigi 1540261909Sluigi ND("send [%d] %d(%d) bytes at %s:%d", 1541261909Sluigi i, (int)copy_len, (int)dst_len, 1542261909Sluigi NM_IFPNAME(dst_ifp), j); 1543261909Sluigi /* round to a multiple of 64 */ 1544261909Sluigi copy_len = (copy_len + 63) & ~63; 1545260368Sluigi 1546270063Sluigi if (unlikely(copy_len > NETMAP_BUF_SIZE(&dst_na->up) || 1547270063Sluigi copy_len > NETMAP_BUF_SIZE(&na->up))) { 1548267151Sluigi RD(5, "invalid len %d, down to 64", (int)copy_len); 1549267151Sluigi copy_len = dst_len = 64; // XXX 1550267151Sluigi } 1551261909Sluigi if (ft_p->ft_flags & NS_INDIRECT) { 1552261909Sluigi if (copyin(src, dst, copy_len)) { 1553261909Sluigi // invalid user pointer, pretend len is 0 1554261909Sluigi dst_len = 0; 1555261909Sluigi } 1556261909Sluigi } else { 1557261909Sluigi //memcpy(dst, src, copy_len); 1558261909Sluigi pkt_copy(src, dst, (int)copy_len); 1559261909Sluigi } 1560261909Sluigi slot->len = dst_len; 1561261909Sluigi slot->flags = (cnt << 8)| NS_MOREFRAG; 1562261909Sluigi j = nm_next(j, lim); 1563261909Sluigi needed--; 1564261909Sluigi ft_p++; 1565261909Sluigi } while (ft_p != ft_end); 1566261909Sluigi slot->flags = (cnt << 8); /* clear flag on last entry */ 1567261909Sluigi } 1568259412Sluigi /* are we done ? */ 1569259412Sluigi if (next == NM_FT_NULL && brd_next == NM_FT_NULL) 1570259412Sluigi break; 1571259412Sluigi } 1572259412Sluigi { 1573259412Sluigi /* current position */ 1574259412Sluigi uint32_t *p = kring->nkr_leases; /* shorthand */ 1575259412Sluigi uint32_t update_pos; 1576259412Sluigi int still_locked = 1; 1577259412Sluigi 1578259412Sluigi mtx_lock(&kring->q_lock); 1579259412Sluigi if (unlikely(howmany > 0)) { 1580259412Sluigi /* not used all bufs. If i am the last one 1581259412Sluigi * i can recover the slots, otherwise must 1582259412Sluigi * fill them with 0 to mark empty packets. 1583259412Sluigi */ 1584259412Sluigi ND("leftover %d bufs", howmany); 1585259412Sluigi if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) { 1586259412Sluigi /* yes i am the last one */ 1587259412Sluigi ND("roll back nkr_hwlease to %d", j); 1588259412Sluigi kring->nkr_hwlease = j; 1589259412Sluigi } else { 1590259412Sluigi while (howmany-- > 0) { 1591259412Sluigi ring->slot[j].len = 0; 1592259412Sluigi ring->slot[j].flags = 0; 1593259412Sluigi j = nm_next(j, lim); 1594259412Sluigi } 1595259412Sluigi } 1596259412Sluigi } 1597259412Sluigi p[lease_idx] = j; /* report I am done */ 1598259412Sluigi 1599260368Sluigi update_pos = kring->nr_hwtail; 1600259412Sluigi 1601259412Sluigi if (my_start == update_pos) { 1602259412Sluigi /* all slots before my_start have been reported, 1603259412Sluigi * so scan subsequent leases to see if other ranges 1604259412Sluigi * have been completed, and to a selwakeup or txsync. 1605259412Sluigi */ 1606259412Sluigi while (lease_idx != kring->nkr_lease_idx && 1607259412Sluigi p[lease_idx] != NR_NOSLOT) { 1608259412Sluigi j = p[lease_idx]; 1609259412Sluigi p[lease_idx] = NR_NOSLOT; 1610259412Sluigi lease_idx = nm_next(lease_idx, lim); 1611259412Sluigi } 1612259412Sluigi /* j is the new 'write' position. j != my_start 1613259412Sluigi * means there are new buffers to report 1614259412Sluigi */ 1615259412Sluigi if (likely(j != my_start)) { 1616260368Sluigi kring->nr_hwtail = j; 1617259412Sluigi still_locked = 0; 1618259412Sluigi mtx_unlock(&kring->q_lock); 1619261909Sluigi dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0); 1620270063Sluigi /* this is netmap_notify for VALE ports and 1621270063Sluigi * netmap_bwrap_notify for bwrap. The latter will 1622270063Sluigi * trigger a txsync on the underlying hwna 1623270063Sluigi */ 1624270063Sluigi if (dst_na->retry && retry--) { 1625270063Sluigi /* XXX this is going to call nm_notify again. 1626270063Sluigi * Only useful for bwrap in virtual machines 1627270063Sluigi */ 1628259412Sluigi goto retry; 1629270063Sluigi } 1630259412Sluigi } 1631259412Sluigi } 1632259412Sluigi if (still_locked) 1633259412Sluigi mtx_unlock(&kring->q_lock); 1634259412Sluigi } 1635259412Sluigicleanup: 1636259412Sluigi d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */ 1637259412Sluigi d->bq_len = 0; 1638259412Sluigi } 1639259412Sluigi brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */ 1640259412Sluigi brddst->bq_len = 0; 1641259412Sluigi return 0; 1642259412Sluigi} 1643259412Sluigi 1644270063Sluigi/* nm_txsync callback for VALE ports */ 1645259412Sluigistatic int 1646270063Sluiginetmap_vp_txsync(struct netmap_kring *kring, int flags) 1647259412Sluigi{ 1648270063Sluigi struct netmap_vp_adapter *na = 1649270063Sluigi (struct netmap_vp_adapter *)kring->na; 1650260368Sluigi u_int done; 1651260368Sluigi u_int const lim = kring->nkr_num_slots - 1; 1652260368Sluigi u_int const cur = kring->rcur; 1653259412Sluigi 1654259412Sluigi if (bridge_batch <= 0) { /* testing only */ 1655260368Sluigi done = cur; // used all 1656259412Sluigi goto done; 1657259412Sluigi } 1658270063Sluigi if (!na->na_bdg) { 1659270063Sluigi done = cur; 1660270063Sluigi goto done; 1661270063Sluigi } 1662259412Sluigi if (bridge_batch > NM_BDG_BATCH) 1663259412Sluigi bridge_batch = NM_BDG_BATCH; 1664259412Sluigi 1665270063Sluigi done = nm_bdg_preflush(kring, cur); 1666259412Sluigidone: 1667260368Sluigi if (done != cur) 1668260368Sluigi D("early break at %d/ %d, tail %d", done, cur, kring->nr_hwtail); 1669260368Sluigi /* 1670260368Sluigi * packets between 'done' and 'cur' are left unsent. 1671260368Sluigi */ 1672260368Sluigi kring->nr_hwcur = done; 1673260368Sluigi kring->nr_hwtail = nm_prev(done, lim); 1674260368Sluigi nm_txsync_finalize(kring); 1675259412Sluigi if (netmap_verbose) 1676270063Sluigi D("%s ring %d flags %d", na->up.name, kring->ring_id, flags); 1677259412Sluigi return 0; 1678259412Sluigi} 1679259412Sluigi 1680259412Sluigi 1681270063Sluigi/* rxsync code used by VALE ports nm_rxsync callback and also 1682270063Sluigi * internally by the brwap 1683259412Sluigi */ 1684259412Sluigistatic int 1685270063Sluiginetmap_vp_rxsync_locked(struct netmap_kring *kring, int flags) 1686259412Sluigi{ 1687270063Sluigi struct netmap_adapter *na = kring->na; 1688259412Sluigi struct netmap_ring *ring = kring->ring; 1689260368Sluigi u_int nm_i, lim = kring->nkr_num_slots - 1; 1690260368Sluigi u_int head = nm_rxsync_prologue(kring); 1691259412Sluigi int n; 1692259412Sluigi 1693260368Sluigi if (head > lim) { 1694259412Sluigi D("ouch dangerous reset!!!"); 1695259412Sluigi n = netmap_ring_reinit(kring); 1696259412Sluigi goto done; 1697259412Sluigi } 1698259412Sluigi 1699260368Sluigi /* First part, import newly received packets. */ 1700260368Sluigi /* actually nothing to do here, they are already in the kring */ 1701259412Sluigi 1702260368Sluigi /* Second part, skip past packets that userspace has released. */ 1703260368Sluigi nm_i = kring->nr_hwcur; 1704260368Sluigi if (nm_i != head) { 1705260368Sluigi /* consistency check, but nothing really important here */ 1706260368Sluigi for (n = 0; likely(nm_i != head); n++) { 1707260368Sluigi struct netmap_slot *slot = &ring->slot[nm_i]; 1708270063Sluigi void *addr = NMB(na, slot); 1709259412Sluigi 1710270063Sluigi if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */ 1711259412Sluigi D("bad buffer index %d, ignore ?", 1712259412Sluigi slot->buf_idx); 1713259412Sluigi } 1714259412Sluigi slot->flags &= ~NS_BUF_CHANGED; 1715260368Sluigi nm_i = nm_next(nm_i, lim); 1716259412Sluigi } 1717260368Sluigi kring->nr_hwcur = head; 1718259412Sluigi } 1719260368Sluigi 1720259412Sluigi /* tell userspace that there are new packets */ 1721260368Sluigi nm_rxsync_finalize(kring); 1722259412Sluigi n = 0; 1723259412Sluigidone: 1724260368Sluigi return n; 1725260368Sluigi} 1726260368Sluigi 1727260368Sluigi/* 1728270063Sluigi * nm_rxsync callback for VALE ports 1729260368Sluigi * user process reading from a VALE switch. 1730260368Sluigi * Already protected against concurrent calls from userspace, 1731260368Sluigi * but we must acquire the queue's lock to protect against 1732260368Sluigi * writers on the same queue. 1733260368Sluigi */ 1734260368Sluigistatic int 1735270063Sluiginetmap_vp_rxsync(struct netmap_kring *kring, int flags) 1736260368Sluigi{ 1737260368Sluigi int n; 1738260368Sluigi 1739260368Sluigi mtx_lock(&kring->q_lock); 1740270063Sluigi n = netmap_vp_rxsync_locked(kring, flags); 1741259412Sluigi mtx_unlock(&kring->q_lock); 1742259412Sluigi return n; 1743259412Sluigi} 1744259412Sluigi 1745260368Sluigi 1746270063Sluigi/* nm_bdg_attach callback for VALE ports 1747270063Sluigi * The na_vp port is this same netmap_adapter. There is no host port. 1748270063Sluigi */ 1749259412Sluigistatic int 1750270063Sluiginetmap_vp_bdg_attach(const char *name, struct netmap_adapter *na) 1751259412Sluigi{ 1752270063Sluigi struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; 1753270063Sluigi 1754270063Sluigi if (vpna->na_bdg) 1755270063Sluigi return EBUSY; 1756270063Sluigi na->na_vp = vpna; 1757270063Sluigi strncpy(na->name, name, sizeof(na->name)); 1758270063Sluigi na->na_hostvp = NULL; 1759270063Sluigi return 0; 1760270063Sluigi} 1761270063Sluigi 1762270063Sluigi/* create a netmap_vp_adapter that describes a VALE port. 1763270063Sluigi * Only persistent VALE ports have a non-null ifp. 1764270063Sluigi */ 1765270063Sluigistatic int 1766270063Sluiginetmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, struct netmap_vp_adapter **ret) 1767270063Sluigi{ 1768259412Sluigi struct netmap_vp_adapter *vpna; 1769259412Sluigi struct netmap_adapter *na; 1770259412Sluigi int error; 1771261909Sluigi u_int npipes = 0; 1772259412Sluigi 1773259412Sluigi vpna = malloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO); 1774259412Sluigi if (vpna == NULL) 1775259412Sluigi return ENOMEM; 1776259412Sluigi 1777259412Sluigi na = &vpna->up; 1778259412Sluigi 1779259412Sluigi na->ifp = ifp; 1780270063Sluigi strncpy(na->name, nmr->nr_name, sizeof(na->name)); 1781259412Sluigi 1782259412Sluigi /* bound checking */ 1783259412Sluigi na->num_tx_rings = nmr->nr_tx_rings; 1784259412Sluigi nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); 1785259412Sluigi nmr->nr_tx_rings = na->num_tx_rings; // write back 1786259412Sluigi na->num_rx_rings = nmr->nr_rx_rings; 1787259412Sluigi nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); 1788259412Sluigi nmr->nr_rx_rings = na->num_rx_rings; // write back 1789259412Sluigi nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE, 1790259412Sluigi 1, NM_BDG_MAXSLOTS, NULL); 1791259412Sluigi na->num_tx_desc = nmr->nr_tx_slots; 1792259412Sluigi nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE, 1793259412Sluigi 1, NM_BDG_MAXSLOTS, NULL); 1794261909Sluigi /* validate number of pipes. We want at least 1, 1795261909Sluigi * but probably can do with some more. 1796261909Sluigi * So let's use 2 as default (when 0 is supplied) 1797261909Sluigi */ 1798261909Sluigi npipes = nmr->nr_arg1; 1799261909Sluigi nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL); 1800261909Sluigi nmr->nr_arg1 = npipes; /* write back */ 1801261909Sluigi /* validate extra bufs */ 1802261909Sluigi nm_bound_var(&nmr->nr_arg3, 0, 0, 1803261909Sluigi 128*NM_BDG_MAXSLOTS, NULL); 1804259412Sluigi na->num_rx_desc = nmr->nr_rx_slots; 1805261909Sluigi vpna->virt_hdr_len = 0; 1806261909Sluigi vpna->mfs = 1514; 1807261909Sluigi /*if (vpna->mfs > netmap_buf_size) TODO netmap_buf_size is zero?? 1808261909Sluigi vpna->mfs = netmap_buf_size; */ 1809261909Sluigi if (netmap_verbose) 1810261909Sluigi D("max frame size %u", vpna->mfs); 1811259412Sluigi 1812259412Sluigi na->na_flags |= NAF_BDG_MAYSLEEP | NAF_MEM_OWNER; 1813270063Sluigi na->nm_txsync = netmap_vp_txsync; 1814270063Sluigi na->nm_rxsync = netmap_vp_rxsync; 1815270063Sluigi na->nm_register = netmap_vp_reg; 1816259412Sluigi na->nm_krings_create = netmap_vp_krings_create; 1817259412Sluigi na->nm_krings_delete = netmap_vp_krings_delete; 1818270063Sluigi na->nm_dtor = netmap_vp_dtor; 1819270063Sluigi na->nm_mem = netmap_mem_private_new(na->name, 1820259412Sluigi na->num_tx_rings, na->num_tx_desc, 1821261909Sluigi na->num_rx_rings, na->num_rx_desc, 1822261909Sluigi nmr->nr_arg3, npipes, &error); 1823261909Sluigi if (na->nm_mem == NULL) 1824261909Sluigi goto err; 1825270063Sluigi na->nm_bdg_attach = netmap_vp_bdg_attach; 1826259412Sluigi /* other nmd fields are set in the common routine */ 1827259412Sluigi error = netmap_attach_common(na); 1828261909Sluigi if (error) 1829261909Sluigi goto err; 1830270063Sluigi *ret = vpna; 1831259412Sluigi return 0; 1832261909Sluigi 1833261909Sluigierr: 1834261909Sluigi if (na->nm_mem != NULL) 1835261909Sluigi netmap_mem_private_delete(na->nm_mem); 1836261909Sluigi free(vpna, M_DEVBUF); 1837261909Sluigi return error; 1838259412Sluigi} 1839259412Sluigi 1840270063Sluigi/* Bridge wrapper code (bwrap). 1841270063Sluigi * This is used to connect a non-VALE-port netmap_adapter (hwna) to a 1842270063Sluigi * VALE switch. 1843270063Sluigi * The main task is to swap the meaning of tx and rx rings to match the 1844270063Sluigi * expectations of the VALE switch code (see nm_bdg_flush). 1845270063Sluigi * 1846270063Sluigi * The bwrap works by interposing a netmap_bwrap_adapter between the 1847270063Sluigi * rest of the system and the hwna. The netmap_bwrap_adapter looks like 1848270063Sluigi * a netmap_vp_adapter to the rest the system, but, internally, it 1849270063Sluigi * translates all callbacks to what the hwna expects. 1850270063Sluigi * 1851270063Sluigi * Note that we have to intercept callbacks coming from two sides: 1852270063Sluigi * 1853270063Sluigi * - callbacks coming from the netmap module are intercepted by 1854270063Sluigi * passing around the netmap_bwrap_adapter instead of the hwna 1855270063Sluigi * 1856270063Sluigi * - callbacks coming from outside of the netmap module only know 1857270063Sluigi * about the hwna. This, however, only happens in interrupt 1858270063Sluigi * handlers, where only the hwna->nm_notify callback is called. 1859270063Sluigi * What the bwrap does is to overwrite the hwna->nm_notify callback 1860270063Sluigi * with its own netmap_bwrap_intr_notify. 1861270063Sluigi * XXX This assumes that the hwna->nm_notify callback was the 1862270063Sluigi * standard netmap_notify(), as it is the case for nic adapters. 1863270063Sluigi * Any additional action performed by hwna->nm_notify will not be 1864270063Sluigi * performed by netmap_bwrap_intr_notify. 1865270063Sluigi * 1866270063Sluigi * Additionally, the bwrap can optionally attach the host rings pair 1867270063Sluigi * of the wrapped adapter to a different port of the switch. 1868270063Sluigi */ 1869260368Sluigi 1870270063Sluigi 1871259412Sluigistatic void 1872259412Sluiginetmap_bwrap_dtor(struct netmap_adapter *na) 1873259412Sluigi{ 1874259412Sluigi struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; 1875259412Sluigi struct netmap_adapter *hwna = bna->hwna; 1876259412Sluigi 1877259412Sluigi ND("na %p", na); 1878270063Sluigi /* drop reference to hwna->ifp. 1879270063Sluigi * If we don't do this, netmap_detach_common(na) 1880270063Sluigi * will think it has set NA(na->ifp) to NULL 1881270063Sluigi */ 1882270063Sluigi na->ifp = NULL; 1883270063Sluigi /* for safety, also drop the possible reference 1884270063Sluigi * in the hostna 1885270063Sluigi */ 1886270063Sluigi bna->host.up.ifp = NULL; 1887259412Sluigi 1888270063Sluigi hwna->nm_mem = bna->save_nmd; 1889259412Sluigi hwna->na_private = NULL; 1890270063Sluigi hwna->na_vp = hwna->na_hostvp = NULL; 1891270063Sluigi hwna->na_flags &= ~NAF_BUSY; 1892259412Sluigi netmap_adapter_put(hwna); 1893259412Sluigi 1894259412Sluigi} 1895259412Sluigi 1896260368Sluigi 1897259412Sluigi/* 1898260368Sluigi * Intr callback for NICs connected to a bridge. 1899260368Sluigi * Simply ignore tx interrupts (maybe we could try to recover space ?) 1900260368Sluigi * and pass received packets from nic to the bridge. 1901260368Sluigi * 1902259412Sluigi * XXX TODO check locking: this is called from the interrupt 1903259412Sluigi * handler so we should make sure that the interface is not 1904259412Sluigi * disconnected while passing down an interrupt. 1905259412Sluigi * 1906260368Sluigi * Note, no user process can access this NIC or the host stack. 1907260368Sluigi * The only part of the ring that is significant are the slots, 1908260368Sluigi * and head/cur/tail are set from the kring as needed 1909260368Sluigi * (part as a receive ring, part as a transmit ring). 1910260368Sluigi * 1911260368Sluigi * callback that overwrites the hwna notify callback. 1912259412Sluigi * Packets come from the outside or from the host stack and are put on an hwna rx ring. 1913259412Sluigi * The bridge wrapper then sends the packets through the bridge. 1914259412Sluigi */ 1915259412Sluigistatic int 1916259412Sluiginetmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, int flags) 1917259412Sluigi{ 1918259412Sluigi struct netmap_bwrap_adapter *bna = na->na_private; 1919259412Sluigi struct netmap_vp_adapter *hostna = &bna->host; 1920259412Sluigi struct netmap_kring *kring, *bkring; 1921259412Sluigi struct netmap_ring *ring; 1922259412Sluigi int is_host_ring = ring_nr == na->num_rx_rings; 1923259412Sluigi struct netmap_vp_adapter *vpna = &bna->up; 1924259412Sluigi int error = 0; 1925259412Sluigi 1926260368Sluigi if (netmap_verbose) 1927270063Sluigi D("%s %s%d 0x%x", na->name, 1928260368Sluigi (tx == NR_TX ? "TX" : "RX"), ring_nr, flags); 1929259412Sluigi 1930259412Sluigi if (flags & NAF_DISABLE_NOTIFY) { 1931270063Sluigi /* the enabled/disabled state of the ring has changed, 1932270063Sluigi * propagate the info to the wrapper (with tx/rx swapped) 1933270063Sluigi */ 1934270063Sluigi if (tx == NR_TX) { 1935270063Sluigi netmap_set_rxring(&vpna->up, ring_nr, 1936270063Sluigi na->tx_rings[ring_nr].nkr_stopped); 1937270063Sluigi } else { 1938270063Sluigi netmap_set_txring(&vpna->up, ring_nr, 1939270063Sluigi na->rx_rings[ring_nr].nkr_stopped); 1940270063Sluigi } 1941259412Sluigi return 0; 1942259412Sluigi } 1943259412Sluigi 1944270063Sluigi if (!nm_netmap_on(na)) 1945259412Sluigi return 0; 1946259412Sluigi 1947260368Sluigi /* we only care about receive interrupts */ 1948259412Sluigi if (tx == NR_TX) 1949259412Sluigi return 0; 1950259412Sluigi 1951259412Sluigi kring = &na->rx_rings[ring_nr]; 1952259412Sluigi ring = kring->ring; 1953259412Sluigi 1954259412Sluigi /* make sure the ring is not disabled */ 1955259412Sluigi if (nm_kr_tryget(kring)) 1956259412Sluigi return 0; 1957259412Sluigi 1958259412Sluigi if (is_host_ring && hostna->na_bdg == NULL) { 1959259412Sluigi error = bna->save_notify(na, ring_nr, tx, flags); 1960259412Sluigi goto put_out; 1961259412Sluigi } 1962259412Sluigi 1963260368Sluigi /* Here we expect ring->head = ring->cur = ring->tail 1964260368Sluigi * because everything has been released from the previous round. 1965260368Sluigi * However the ring is shared and we might have info from 1966260368Sluigi * the wrong side (the tx ring). Hence we overwrite with 1967260368Sluigi * the info from the rx kring. 1968260368Sluigi */ 1969260368Sluigi if (netmap_verbose) 1970270063Sluigi D("%s head %d cur %d tail %d (kring %d %d %d)", na->name, 1971260368Sluigi ring->head, ring->cur, ring->tail, 1972260368Sluigi kring->rhead, kring->rcur, kring->rtail); 1973260368Sluigi 1974260368Sluigi ring->head = kring->rhead; 1975260368Sluigi ring->cur = kring->rcur; 1976260368Sluigi ring->tail = kring->rtail; 1977260368Sluigi 1978259412Sluigi if (is_host_ring) { 1979259412Sluigi vpna = hostna; 1980259412Sluigi ring_nr = 0; 1981267128Sluigi } 1982261909Sluigi /* simulate a user wakeup on the rx ring */ 1983261909Sluigi /* fetch packets that have arrived. 1984261909Sluigi * XXX maybe do this in a loop ? 1985261909Sluigi */ 1986261909Sluigi error = kring->nm_sync(kring, 0); 1987261909Sluigi if (error) 1988261909Sluigi goto put_out; 1989260368Sluigi if (kring->nr_hwcur == kring->nr_hwtail && netmap_verbose) { 1990259412Sluigi D("how strange, interrupt with no packets on %s", 1991270063Sluigi na->name); 1992259412Sluigi goto put_out; 1993259412Sluigi } 1994260368Sluigi 1995260368Sluigi /* new packets are ring->cur to ring->tail, and the bkring 1996260368Sluigi * had hwcur == ring->cur. So advance ring->cur to ring->tail 1997260368Sluigi * to push all packets out. 1998260368Sluigi */ 1999260368Sluigi ring->head = ring->cur = ring->tail; 2000260368Sluigi 2001260368Sluigi /* also set tail to what the bwrap expects */ 2002260368Sluigi bkring = &vpna->up.tx_rings[ring_nr]; 2003260368Sluigi ring->tail = bkring->nr_hwtail; // rtail too ? 2004260368Sluigi 2005260368Sluigi /* pass packets to the switch */ 2006260368Sluigi nm_txsync_prologue(bkring); // XXX error checking ? 2007270063Sluigi netmap_vp_txsync(bkring, flags); 2008259412Sluigi 2009260368Sluigi /* mark all buffers as released on this ring */ 2010260368Sluigi ring->head = ring->cur = kring->nr_hwtail; 2011260368Sluigi ring->tail = kring->rtail; 2012260368Sluigi /* another call to actually release the buffers */ 2013260368Sluigi if (!is_host_ring) { 2014261909Sluigi error = kring->nm_sync(kring, 0); 2015260368Sluigi } else { 2016260368Sluigi /* mark all packets as released, as in the 2017260368Sluigi * second part of netmap_rxsync_from_host() 2018260368Sluigi */ 2019260368Sluigi kring->nr_hwcur = kring->nr_hwtail; 2020260368Sluigi nm_rxsync_finalize(kring); 2021260368Sluigi } 2022259412Sluigi 2023259412Sluigiput_out: 2024259412Sluigi nm_kr_put(kring); 2025259412Sluigi return error; 2026259412Sluigi} 2027259412Sluigi 2028260368Sluigi 2029270063Sluigi/* nm_register callback for bwrap */ 2030259412Sluigistatic int 2031259412Sluiginetmap_bwrap_register(struct netmap_adapter *na, int onoff) 2032259412Sluigi{ 2033259412Sluigi struct netmap_bwrap_adapter *bna = 2034259412Sluigi (struct netmap_bwrap_adapter *)na; 2035259412Sluigi struct netmap_adapter *hwna = bna->hwna; 2036259412Sluigi struct netmap_vp_adapter *hostna = &bna->host; 2037259412Sluigi int error; 2038259412Sluigi 2039270063Sluigi ND("%s %s", na->name, onoff ? "on" : "off"); 2040259412Sluigi 2041259412Sluigi if (onoff) { 2042259412Sluigi int i; 2043259412Sluigi 2044270063Sluigi /* netmap_do_regif has been called on the bwrap na. 2045270063Sluigi * We need to pass the information about the 2046270063Sluigi * memory allocator down to the hwna before 2047270063Sluigi * putting it in netmap mode 2048270063Sluigi */ 2049259412Sluigi hwna->na_lut = na->na_lut; 2050259412Sluigi hwna->na_lut_objtotal = na->na_lut_objtotal; 2051270063Sluigi hwna->na_lut_objsize = na->na_lut_objsize; 2052259412Sluigi 2053259412Sluigi if (hostna->na_bdg) { 2054270063Sluigi /* if the host rings have been attached to switch, 2055270063Sluigi * we need to copy the memory allocator information 2056270063Sluigi * in the hostna also 2057270063Sluigi */ 2058259412Sluigi hostna->up.na_lut = na->na_lut; 2059259412Sluigi hostna->up.na_lut_objtotal = na->na_lut_objtotal; 2060270063Sluigi hostna->up.na_lut_objsize = na->na_lut_objsize; 2061259412Sluigi } 2062259412Sluigi 2063260516Sluigi /* cross-link the netmap rings 2064260516Sluigi * The original number of rings comes from hwna, 2065260516Sluigi * rx rings on one side equals tx rings on the other. 2066270063Sluigi * We need to do this now, after the initialization 2067270063Sluigi * of the kring->ring pointers 2068260516Sluigi */ 2069261909Sluigi for (i = 0; i < na->num_rx_rings + 1; i++) { 2070259412Sluigi hwna->tx_rings[i].nkr_num_slots = na->rx_rings[i].nkr_num_slots; 2071259412Sluigi hwna->tx_rings[i].ring = na->rx_rings[i].ring; 2072259412Sluigi } 2073261909Sluigi for (i = 0; i < na->num_tx_rings + 1; i++) { 2074259412Sluigi hwna->rx_rings[i].nkr_num_slots = na->tx_rings[i].nkr_num_slots; 2075259412Sluigi hwna->rx_rings[i].ring = na->tx_rings[i].ring; 2076259412Sluigi } 2077259412Sluigi } 2078259412Sluigi 2079270063Sluigi /* forward the request to the hwna */ 2080270063Sluigi error = hwna->nm_register(hwna, onoff); 2081270063Sluigi if (error) 2082270063Sluigi return error; 2083259412Sluigi 2084270063Sluigi /* impersonate a netmap_vp_adapter */ 2085270063Sluigi netmap_vp_reg(na, onoff); 2086270063Sluigi if (hostna->na_bdg) 2087270063Sluigi netmap_vp_reg(&hostna->up, onoff); 2088259412Sluigi 2089259412Sluigi if (onoff) { 2090270063Sluigi /* intercept the hwna nm_nofify callback */ 2091259412Sluigi bna->save_notify = hwna->nm_notify; 2092259412Sluigi hwna->nm_notify = netmap_bwrap_intr_notify; 2093259412Sluigi } else { 2094259412Sluigi hwna->nm_notify = bna->save_notify; 2095259412Sluigi hwna->na_lut = NULL; 2096259412Sluigi hwna->na_lut_objtotal = 0; 2097270063Sluigi hwna->na_lut_objsize = 0; 2098259412Sluigi } 2099259412Sluigi 2100259412Sluigi return 0; 2101259412Sluigi} 2102259412Sluigi 2103270063Sluigi/* nm_config callback for bwrap */ 2104259412Sluigistatic int 2105259412Sluiginetmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd, 2106259412Sluigi u_int *rxr, u_int *rxd) 2107259412Sluigi{ 2108259412Sluigi struct netmap_bwrap_adapter *bna = 2109259412Sluigi (struct netmap_bwrap_adapter *)na; 2110259412Sluigi struct netmap_adapter *hwna = bna->hwna; 2111259412Sluigi 2112259412Sluigi /* forward the request */ 2113259412Sluigi netmap_update_config(hwna); 2114259412Sluigi /* swap the results */ 2115259412Sluigi *txr = hwna->num_rx_rings; 2116259412Sluigi *txd = hwna->num_rx_desc; 2117259412Sluigi *rxr = hwna->num_tx_rings; 2118259412Sluigi *rxd = hwna->num_rx_desc; 2119259412Sluigi 2120259412Sluigi return 0; 2121259412Sluigi} 2122259412Sluigi 2123260368Sluigi 2124270063Sluigi/* nm_krings_create callback for bwrap */ 2125259412Sluigistatic int 2126259412Sluiginetmap_bwrap_krings_create(struct netmap_adapter *na) 2127259412Sluigi{ 2128259412Sluigi struct netmap_bwrap_adapter *bna = 2129259412Sluigi (struct netmap_bwrap_adapter *)na; 2130259412Sluigi struct netmap_adapter *hwna = bna->hwna; 2131259412Sluigi struct netmap_adapter *hostna = &bna->host.up; 2132259412Sluigi int error; 2133259412Sluigi 2134270063Sluigi ND("%s", na->name); 2135259412Sluigi 2136270063Sluigi /* impersonate a netmap_vp_adapter */ 2137259412Sluigi error = netmap_vp_krings_create(na); 2138259412Sluigi if (error) 2139259412Sluigi return error; 2140259412Sluigi 2141270063Sluigi /* also create the hwna krings */ 2142259412Sluigi error = hwna->nm_krings_create(hwna); 2143259412Sluigi if (error) { 2144259412Sluigi netmap_vp_krings_delete(na); 2145259412Sluigi return error; 2146259412Sluigi } 2147270063Sluigi /* the connection between the bwrap krings and the hwna krings 2148270063Sluigi * will be perfomed later, in the nm_register callback, since 2149270063Sluigi * now the kring->ring pointers have not been initialized yet 2150270063Sluigi */ 2151259412Sluigi 2152261909Sluigi if (na->na_flags & NAF_HOST_RINGS) { 2153270063Sluigi /* the hostna rings are the host rings of the bwrap. 2154270063Sluigi * The corresponding krings must point back to the 2155270063Sluigi * hostna 2156270063Sluigi */ 2157261909Sluigi hostna->tx_rings = na->tx_rings + na->num_tx_rings; 2158270063Sluigi hostna->tx_rings[0].na = hostna; 2159261909Sluigi hostna->rx_rings = na->rx_rings + na->num_rx_rings; 2160270063Sluigi hostna->rx_rings[0].na = hostna; 2161261909Sluigi } 2162259412Sluigi 2163259412Sluigi return 0; 2164259412Sluigi} 2165259412Sluigi 2166260368Sluigi 2167259412Sluigistatic void 2168259412Sluiginetmap_bwrap_krings_delete(struct netmap_adapter *na) 2169259412Sluigi{ 2170259412Sluigi struct netmap_bwrap_adapter *bna = 2171259412Sluigi (struct netmap_bwrap_adapter *)na; 2172259412Sluigi struct netmap_adapter *hwna = bna->hwna; 2173259412Sluigi 2174270063Sluigi ND("%s", na->name); 2175259412Sluigi 2176259412Sluigi hwna->nm_krings_delete(hwna); 2177259412Sluigi netmap_vp_krings_delete(na); 2178259412Sluigi} 2179259412Sluigi 2180260368Sluigi 2181259412Sluigi/* notify method for the bridge-->hwna direction */ 2182259412Sluigistatic int 2183259412Sluiginetmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags) 2184259412Sluigi{ 2185259412Sluigi struct netmap_bwrap_adapter *bna = 2186259412Sluigi (struct netmap_bwrap_adapter *)na; 2187259412Sluigi struct netmap_adapter *hwna = bna->hwna; 2188259412Sluigi struct netmap_kring *kring, *hw_kring; 2189259412Sluigi struct netmap_ring *ring; 2190260368Sluigi u_int lim; 2191259412Sluigi int error = 0; 2192259412Sluigi 2193259412Sluigi if (tx == NR_TX) 2194260700Sluigi return EINVAL; 2195259412Sluigi 2196259412Sluigi kring = &na->rx_rings[ring_n]; 2197259412Sluigi hw_kring = &hwna->tx_rings[ring_n]; 2198259412Sluigi ring = kring->ring; 2199259412Sluigi lim = kring->nkr_num_slots - 1; 2200259412Sluigi 2201270063Sluigi if (!nm_netmap_on(hwna)) 2202259412Sluigi return 0; 2203261909Sluigi mtx_lock(&kring->q_lock); 2204260368Sluigi /* first step: simulate a user wakeup on the rx ring */ 2205270063Sluigi netmap_vp_rxsync_locked(kring, flags); 2206260368Sluigi ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", 2207270063Sluigi na->name, ring_n, 2208260368Sluigi kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, 2209260368Sluigi ring->head, ring->cur, ring->tail, 2210260368Sluigi hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail); 2211260368Sluigi /* second step: the simulated user consumes all new packets */ 2212260368Sluigi ring->head = ring->cur = ring->tail; 2213260368Sluigi 2214260368Sluigi /* third step: the new packets are sent on the tx ring 2215260368Sluigi * (which is actually the same ring) 2216260368Sluigi */ 2217260368Sluigi /* set tail to what the hw expects */ 2218260368Sluigi ring->tail = hw_kring->rtail; 2219261909Sluigi nm_txsync_prologue(&hwna->tx_rings[ring_n]); // XXX error checking ? 2220261909Sluigi error = hw_kring->nm_sync(hw_kring, flags); 2221260368Sluigi 2222260368Sluigi /* fourth step: now we are back the rx ring */ 2223260368Sluigi /* claim ownership on all hw owned bufs */ 2224260368Sluigi ring->head = nm_next(ring->tail, lim); /* skip past reserved slot */ 2225260368Sluigi ring->tail = kring->rtail; /* restore saved value of tail, for safety */ 2226260368Sluigi 2227260368Sluigi /* fifth step: the user goes to sleep again, causing another rxsync */ 2228270063Sluigi netmap_vp_rxsync_locked(kring, flags); 2229260368Sluigi ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", 2230270063Sluigi na->name, ring_n, 2231260368Sluigi kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, 2232260368Sluigi ring->head, ring->cur, ring->tail, 2233260368Sluigi hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail); 2234261909Sluigi mtx_unlock(&kring->q_lock); 2235259412Sluigi return error; 2236259412Sluigi} 2237259412Sluigi 2238260368Sluigi 2239270063Sluigi/* notify method for the bridge-->host-rings path */ 2240259412Sluigistatic int 2241259412Sluiginetmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags) 2242259412Sluigi{ 2243259412Sluigi struct netmap_bwrap_adapter *bna = na->na_private; 2244259412Sluigi struct netmap_adapter *port_na = &bna->up.up; 2245259412Sluigi if (tx == NR_TX || ring_n != 0) 2246260700Sluigi return EINVAL; 2247259412Sluigi return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags); 2248259412Sluigi} 2249259412Sluigi 2250260368Sluigi 2251270063Sluigi/* nm_bdg_ctl callback for the bwrap. 2252270063Sluigi * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd]. 2253270063Sluigi * On attach, it needs to provide a fake netmap_priv_d structure and 2254270063Sluigi * perform a netmap_do_regif() on the bwrap. This will put both the 2255270063Sluigi * bwrap and the hwna in netmap mode, with the netmap rings shared 2256270063Sluigi * and cross linked. Moroever, it will start intercepting interrupts 2257270063Sluigi * directed to hwna. 2258270063Sluigi */ 2259259412Sluigistatic int 2260270063Sluiginetmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach) 2261259412Sluigi{ 2262270063Sluigi struct netmap_priv_d *npriv; 2263270063Sluigi struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; 2264270063Sluigi struct netmap_if *nifp; 2265270063Sluigi int error = 0; 2266270063Sluigi 2267270063Sluigi if (attach) { 2268270063Sluigi if (NETMAP_OWNED_BY_ANY(na)) { 2269270063Sluigi return EBUSY; 2270270063Sluigi } 2271270063Sluigi if (bna->na_kpriv) { 2272270063Sluigi /* nothing to do */ 2273270063Sluigi return 0; 2274270063Sluigi } 2275270063Sluigi npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO); 2276270063Sluigi if (npriv == NULL) 2277270063Sluigi return ENOMEM; 2278270063Sluigi nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags, &error); 2279270063Sluigi if (!nifp) { 2280270063Sluigi bzero(npriv, sizeof(*npriv)); 2281270063Sluigi free(npriv, M_DEVBUF); 2282270063Sluigi return error; 2283270063Sluigi } 2284270063Sluigi bna->na_kpriv = npriv; 2285270063Sluigi na->na_flags |= NAF_BUSY; 2286270063Sluigi } else { 2287270063Sluigi int last_instance; 2288270063Sluigi 2289270063Sluigi if (na->active_fds == 0) /* not registered */ 2290270063Sluigi return EINVAL; 2291270063Sluigi last_instance = netmap_dtor_locked(bna->na_kpriv); 2292270063Sluigi if (!last_instance) { 2293270063Sluigi D("--- error, trying to detach an entry with active mmaps"); 2294270063Sluigi error = EINVAL; 2295270063Sluigi } else { 2296270063Sluigi struct nm_bridge *b = bna->up.na_bdg, 2297270063Sluigi *bh = bna->host.na_bdg; 2298270063Sluigi npriv = bna->na_kpriv; 2299270063Sluigi bna->na_kpriv = NULL; 2300270063Sluigi D("deleting priv"); 2301270063Sluigi 2302270063Sluigi bzero(npriv, sizeof(*npriv)); 2303270063Sluigi free(npriv, M_DEVBUF); 2304270063Sluigi if (b) { 2305270063Sluigi /* XXX the bwrap dtor should take care 2306270063Sluigi * of this (2014-06-16) 2307270063Sluigi */ 2308270063Sluigi netmap_bdg_detach_common(b, bna->up.bdg_port, 2309270063Sluigi (bh ? bna->host.bdg_port : -1)); 2310270063Sluigi } 2311270063Sluigi na->na_flags &= ~NAF_BUSY; 2312270063Sluigi } 2313270063Sluigi } 2314270063Sluigi return error; 2315270063Sluigi 2316270063Sluigi} 2317270063Sluigi 2318270063Sluigi/* attach a bridge wrapper to the 'real' device */ 2319270063Sluigiint 2320270063Sluiginetmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna) 2321270063Sluigi{ 2322259412Sluigi struct netmap_bwrap_adapter *bna; 2323270063Sluigi struct netmap_adapter *na = NULL; 2324270063Sluigi struct netmap_adapter *hostna = NULL; 2325270063Sluigi int error = 0; 2326259412Sluigi 2327270063Sluigi /* make sure the NIC is not already in use */ 2328270063Sluigi if (NETMAP_OWNED_BY_ANY(hwna)) { 2329270063Sluigi D("NIC %s busy, cannot attach to bridge", hwna->name); 2330270063Sluigi return EBUSY; 2331270063Sluigi } 2332259412Sluigi 2333259412Sluigi bna = malloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO); 2334270063Sluigi if (bna == NULL) { 2335259412Sluigi return ENOMEM; 2336270063Sluigi } 2337259412Sluigi 2338259412Sluigi na = &bna->up.up; 2339270063Sluigi strncpy(na->name, nr_name, sizeof(na->name)); 2340259412Sluigi /* fill the ring data for the bwrap adapter with rx/tx meanings 2341259412Sluigi * swapped. The real cross-linking will be done during register, 2342259412Sluigi * when all the krings will have been created. 2343259412Sluigi */ 2344259412Sluigi na->num_rx_rings = hwna->num_tx_rings; 2345259412Sluigi na->num_tx_rings = hwna->num_rx_rings; 2346259412Sluigi na->num_tx_desc = hwna->num_rx_desc; 2347259412Sluigi na->num_rx_desc = hwna->num_tx_desc; 2348259412Sluigi na->nm_dtor = netmap_bwrap_dtor; 2349259412Sluigi na->nm_register = netmap_bwrap_register; 2350259412Sluigi // na->nm_txsync = netmap_bwrap_txsync; 2351259412Sluigi // na->nm_rxsync = netmap_bwrap_rxsync; 2352259412Sluigi na->nm_config = netmap_bwrap_config; 2353259412Sluigi na->nm_krings_create = netmap_bwrap_krings_create; 2354259412Sluigi na->nm_krings_delete = netmap_bwrap_krings_delete; 2355259412Sluigi na->nm_notify = netmap_bwrap_notify; 2356270063Sluigi na->nm_bdg_ctl = netmap_bwrap_bdg_ctl; 2357270063Sluigi na->pdev = hwna->pdev; 2358270063Sluigi na->nm_mem = netmap_mem_private_new(na->name, 2359270063Sluigi na->num_tx_rings, na->num_tx_desc, 2360270063Sluigi na->num_rx_rings, na->num_rx_desc, 2361270063Sluigi 0, 0, &error); 2362270063Sluigi na->na_flags |= NAF_MEM_OWNER; 2363270063Sluigi if (na->nm_mem == NULL) 2364270063Sluigi goto err_put; 2365259412Sluigi bna->up.retry = 1; /* XXX maybe this should depend on the hwna */ 2366259412Sluigi 2367259412Sluigi bna->hwna = hwna; 2368259412Sluigi netmap_adapter_get(hwna); 2369259412Sluigi hwna->na_private = bna; /* weak reference */ 2370270063Sluigi hwna->na_vp = &bna->up; 2371270063Sluigi 2372261909Sluigi if (hwna->na_flags & NAF_HOST_RINGS) { 2373270063Sluigi if (hwna->na_flags & NAF_SW_ONLY) 2374270063Sluigi na->na_flags |= NAF_SW_ONLY; 2375261909Sluigi na->na_flags |= NAF_HOST_RINGS; 2376261909Sluigi hostna = &bna->host.up; 2377270063Sluigi snprintf(hostna->name, sizeof(hostna->name), "%s^", nr_name); 2378261909Sluigi hostna->ifp = hwna->ifp; 2379261909Sluigi hostna->num_tx_rings = 1; 2380261909Sluigi hostna->num_tx_desc = hwna->num_rx_desc; 2381261909Sluigi hostna->num_rx_rings = 1; 2382261909Sluigi hostna->num_rx_desc = hwna->num_tx_desc; 2383261909Sluigi // hostna->nm_txsync = netmap_bwrap_host_txsync; 2384261909Sluigi // hostna->nm_rxsync = netmap_bwrap_host_rxsync; 2385261909Sluigi hostna->nm_notify = netmap_bwrap_host_notify; 2386261909Sluigi hostna->nm_mem = na->nm_mem; 2387261909Sluigi hostna->na_private = bna; 2388270063Sluigi hostna->na_vp = &bna->up; 2389270063Sluigi na->na_hostvp = hwna->na_hostvp = 2390270063Sluigi hostna->na_hostvp = &bna->host; 2391270063Sluigi hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */ 2392261909Sluigi } 2393259412Sluigi 2394260368Sluigi ND("%s<->%s txr %d txd %d rxr %d rxd %d", 2395270063Sluigi na->name, ifp->if_xname, 2396259412Sluigi na->num_tx_rings, na->num_tx_desc, 2397259412Sluigi na->num_rx_rings, na->num_rx_desc); 2398259412Sluigi 2399259412Sluigi error = netmap_attach_common(na); 2400259412Sluigi if (error) { 2401270063Sluigi goto err_free; 2402259412Sluigi } 2403270063Sluigi /* make bwrap ifp point to the real ifp 2404270063Sluigi * NOTE: netmap_attach_common() interprets a non-NULL na->ifp 2405270063Sluigi * as a request to make the ifp point to the na. Since we 2406270063Sluigi * do not want to change the na already pointed to by hwna->ifp, 2407270063Sluigi * the following assignment has to be delayed until now 2408270063Sluigi */ 2409270063Sluigi na->ifp = hwna->ifp; 2410270063Sluigi hwna->na_flags |= NAF_BUSY; 2411270063Sluigi /* make hwna point to the allocator we are actually using, 2412270063Sluigi * so that monitors will be able to find it 2413270063Sluigi */ 2414270063Sluigi bna->save_nmd = hwna->nm_mem; 2415270063Sluigi hwna->nm_mem = na->nm_mem; 2416259412Sluigi return 0; 2417270063Sluigi 2418270063Sluigierr_free: 2419270063Sluigi netmap_mem_private_delete(na->nm_mem); 2420270063Sluigierr_put: 2421270063Sluigi hwna->na_vp = hwna->na_hostvp = NULL; 2422270063Sluigi netmap_adapter_put(hwna); 2423270063Sluigi free(bna, M_DEVBUF); 2424270063Sluigi return error; 2425270063Sluigi 2426259412Sluigi} 2427259412Sluigi 2428260368Sluigi 2429259412Sluigivoid 2430259412Sluiginetmap_init_bridges(void) 2431259412Sluigi{ 2432259412Sluigi int i; 2433259412Sluigi bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */ 2434259412Sluigi for (i = 0; i < NM_BRIDGES; i++) 2435259412Sluigi BDG_RWINIT(&nm_bridges[i]); 2436259412Sluigi} 2437259412Sluigi#endif /* WITH_VALE */ 2438