netmap_vale.c revision 331722
1331722Seadler/* 2260368Sluigi * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. 3259412Sluigi * 4259412Sluigi * Redistribution and use in source and binary forms, with or without 5259412Sluigi * modification, are permitted provided that the following conditions 6259412Sluigi * are met: 7259412Sluigi * 1. Redistributions of source code must retain the above copyright 8259412Sluigi * notice, this list of conditions and the following disclaimer. 9259412Sluigi * 2. Redistributions in binary form must reproduce the above copyright 10259412Sluigi * notice, this list of conditions and the following disclaimer in the 11259412Sluigi * documentation and/or other materials provided with the distribution. 12259412Sluigi * 13259412Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14259412Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15259412Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16259412Sluigi * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17259412Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18259412Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19259412Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20259412Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21259412Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22259412Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23259412Sluigi * SUCH DAMAGE. 24259412Sluigi */ 25259412Sluigi 26259412Sluigi 27259412Sluigi/* 28259412Sluigi * This module implements the VALE switch for netmap 29259412Sluigi 30259412Sluigi--- VALE SWITCH --- 31259412Sluigi 32259412SluigiNMG_LOCK() serializes all modifications to switches and ports. 33259412SluigiA switch cannot be deleted until all ports are gone. 34259412Sluigi 35259412SluigiFor each switch, an SX lock (RWlock on linux) protects 36259412Sluigideletion of ports. When configuring or deleting a new port, the 37259412Sluigilock is acquired in exclusive mode (after holding NMG_LOCK). 38259412SluigiWhen forwarding, the lock is acquired in shared mode (without NMG_LOCK). 39259412SluigiThe lock is held throughout the entire forwarding cycle, 40259412Sluigiduring which the thread may incur in a page fault. 41259412SluigiHence it is important that sleepable shared locks are used. 42259412Sluigi 43259412SluigiOn the rx ring, the per-port lock is grabbed initially to reserve 44259412Sluigia number of slot in the ring, then the lock is released, 45259412Sluigipackets are copied from source to destination, and then 46259412Sluigithe lock is acquired again and the receive ring is updated. 47259412Sluigi(A similar thing is done on the tx ring for NIC and host stack 48259412Sluigiports attached to the switch) 49259412Sluigi 50259412Sluigi */ 51259412Sluigi 52259412Sluigi/* 53259412Sluigi * OS-specific code that is used only within this file. 54259412Sluigi * Other OS-specific code that must be accessed by drivers 55259412Sluigi * is present in netmap_kern.h 56259412Sluigi */ 57259412Sluigi 58259412Sluigi#if defined(__FreeBSD__) 59259412Sluigi#include <sys/cdefs.h> /* prerequisite */ 60259412Sluigi__FBSDID("$FreeBSD: stable/11/sys/dev/netmap/netmap_vale.c 331722 2018-03-29 02:50:57Z eadler $"); 61259412Sluigi 62259412Sluigi#include <sys/types.h> 63259412Sluigi#include <sys/errno.h> 64259412Sluigi#include <sys/param.h> /* defines used in kernel.h */ 65259412Sluigi#include <sys/kernel.h> /* types used in module initialization */ 66259412Sluigi#include <sys/conf.h> /* cdevsw struct, UID, GID */ 67259412Sluigi#include <sys/sockio.h> 68259412Sluigi#include <sys/socketvar.h> /* struct socket */ 69259412Sluigi#include <sys/malloc.h> 70259412Sluigi#include <sys/poll.h> 71259412Sluigi#include <sys/rwlock.h> 72259412Sluigi#include <sys/socket.h> /* sockaddrs */ 73259412Sluigi#include <sys/selinfo.h> 74259412Sluigi#include <sys/sysctl.h> 75259412Sluigi#include <net/if.h> 76259412Sluigi#include <net/if_var.h> 77259412Sluigi#include <net/bpf.h> /* BIOCIMMEDIATE */ 78259412Sluigi#include <machine/bus.h> /* bus_dmamap_* */ 79259412Sluigi#include <sys/endian.h> 80259412Sluigi#include <sys/refcount.h> 81259412Sluigi 82259412Sluigi 83259412Sluigi#define BDG_RWLOCK_T struct rwlock // struct rwlock 84259412Sluigi 85259412Sluigi#define BDG_RWINIT(b) \ 86259412Sluigi rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS) 87259412Sluigi#define BDG_WLOCK(b) rw_wlock(&(b)->bdg_lock) 88259412Sluigi#define BDG_WUNLOCK(b) rw_wunlock(&(b)->bdg_lock) 89259412Sluigi#define BDG_RLOCK(b) rw_rlock(&(b)->bdg_lock) 90259412Sluigi#define BDG_RTRYLOCK(b) rw_try_rlock(&(b)->bdg_lock) 91259412Sluigi#define BDG_RUNLOCK(b) rw_runlock(&(b)->bdg_lock) 92259412Sluigi#define BDG_RWDESTROY(b) rw_destroy(&(b)->bdg_lock) 93259412Sluigi 94259412Sluigi 95259412Sluigi#elif defined(linux) 96259412Sluigi 97259412Sluigi#include "bsd_glue.h" 98259412Sluigi 99259412Sluigi#elif defined(__APPLE__) 100259412Sluigi 101259412Sluigi#warning OSX support is only partial 102259412Sluigi#include "osx_glue.h" 103259412Sluigi 104259412Sluigi#else 105259412Sluigi 106259412Sluigi#error Unsupported platform 107259412Sluigi 108259412Sluigi#endif /* unsupported */ 109259412Sluigi 110259412Sluigi/* 111259412Sluigi * common headers 112259412Sluigi */ 113259412Sluigi 114259412Sluigi#include <net/netmap.h> 115259412Sluigi#include <dev/netmap/netmap_kern.h> 116259412Sluigi#include <dev/netmap/netmap_mem2.h> 117259412Sluigi 118259412Sluigi#ifdef WITH_VALE 119259412Sluigi 120259412Sluigi/* 121259412Sluigi * system parameters (most of them in netmap_kern.h) 122259412Sluigi * NM_NAME prefix for switch port names, default "vale" 123259412Sluigi * NM_BDG_MAXPORTS number of ports 124259412Sluigi * NM_BRIDGES max number of switches in the system. 125259412Sluigi * XXX should become a sysctl or tunable 126259412Sluigi * 127259412Sluigi * Switch ports are named valeX:Y where X is the switch name and Y 128259412Sluigi * is the port. If Y matches a physical interface name, the port is 129259412Sluigi * connected to a physical device. 130259412Sluigi * 131259412Sluigi * Unlike physical interfaces, switch ports use their own memory region 132259412Sluigi * for rings and buffers. 133259412Sluigi * The virtual interfaces use per-queue lock instead of core lock. 134259412Sluigi * In the tx loop, we aggregate traffic in batches to make all operations 135259412Sluigi * faster. The batch size is bridge_batch. 136259412Sluigi */ 137259412Sluigi#define NM_BDG_MAXRINGS 16 /* XXX unclear how many. */ 138259412Sluigi#define NM_BDG_MAXSLOTS 4096 /* XXX same as above */ 139259412Sluigi#define NM_BRIDGE_RINGSIZE 1024 /* in the device */ 140259412Sluigi#define NM_BDG_HASH 1024 /* forwarding table entries */ 141259412Sluigi#define NM_BDG_BATCH 1024 /* entries in the forwarding buffer */ 142259412Sluigi#define NM_MULTISEG 64 /* max size of a chain of bufs */ 143259412Sluigi/* actual size of the tables */ 144259412Sluigi#define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NM_MULTISEG) 145259412Sluigi/* NM_FT_NULL terminates a list of slots in the ft */ 146259412Sluigi#define NM_FT_NULL NM_BDG_BATCH_MAX 147259412Sluigi#define NM_BRIDGES 8 /* number of bridges */ 148259412Sluigi 149259412Sluigi 150259412Sluigi/* 151259412Sluigi * bridge_batch is set via sysctl to the max batch size to be 152259412Sluigi * used in the bridge. The actual value may be larger as the 153259412Sluigi * last packet in the block may overflow the size. 154259412Sluigi */ 155259412Sluigiint bridge_batch = NM_BDG_BATCH; /* bridge batch size */ 156259412SluigiSYSCTL_DECL(_dev_netmap); 157259412SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , ""); 158259412Sluigi 159259412Sluigi 160270063Sluigistatic int netmap_vp_create(struct nmreq *, struct ifnet *, struct netmap_vp_adapter **); 161270063Sluigistatic int netmap_vp_reg(struct netmap_adapter *na, int onoff); 162259412Sluigistatic int netmap_bwrap_register(struct netmap_adapter *, int onoff); 163259412Sluigi 164259412Sluigi/* 165259412Sluigi * For each output interface, nm_bdg_q is used to construct a list. 166259412Sluigi * bq_len is the number of output buffers (we can have coalescing 167259412Sluigi * during the copy). 168259412Sluigi */ 169259412Sluigistruct nm_bdg_q { 170259412Sluigi uint16_t bq_head; 171259412Sluigi uint16_t bq_tail; 172259412Sluigi uint32_t bq_len; /* number of buffers */ 173259412Sluigi}; 174259412Sluigi 175259412Sluigi/* XXX revise this */ 176259412Sluigistruct nm_hash_ent { 177259412Sluigi uint64_t mac; /* the top 2 bytes are the epoch */ 178259412Sluigi uint64_t ports; 179259412Sluigi}; 180259412Sluigi 181259412Sluigi/* 182259412Sluigi * nm_bridge is a descriptor for a VALE switch. 183259412Sluigi * Interfaces for a bridge are all in bdg_ports[]. 184259412Sluigi * The array has fixed size, an empty entry does not terminate 185259412Sluigi * the search, but lookups only occur on attach/detach so we 186259412Sluigi * don't mind if they are slow. 187259412Sluigi * 188259412Sluigi * The bridge is non blocking on the transmit ports: excess 189259412Sluigi * packets are dropped if there is no room on the output port. 190259412Sluigi * 191259412Sluigi * bdg_lock protects accesses to the bdg_ports array. 192259412Sluigi * This is a rw lock (or equivalent). 193259412Sluigi */ 194259412Sluigistruct nm_bridge { 195259412Sluigi /* XXX what is the proper alignment/layout ? */ 196259412Sluigi BDG_RWLOCK_T bdg_lock; /* protects bdg_ports */ 197259412Sluigi int bdg_namelen; 198259412Sluigi uint32_t bdg_active_ports; /* 0 means free */ 199259412Sluigi char bdg_basename[IFNAMSIZ]; 200259412Sluigi 201259412Sluigi /* Indexes of active ports (up to active_ports) 202259412Sluigi * and all other remaining ports. 203259412Sluigi */ 204259412Sluigi uint8_t bdg_port_index[NM_BDG_MAXPORTS]; 205259412Sluigi 206259412Sluigi struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS]; 207259412Sluigi 208259412Sluigi 209259412Sluigi /* 210259412Sluigi * The function to decide the destination port. 211259412Sluigi * It returns either of an index of the destination port, 212259412Sluigi * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to 213259412Sluigi * forward this packet. ring_nr is the source ring index, and the 214259412Sluigi * function may overwrite this value to forward this packet to a 215259412Sluigi * different ring index. 216259412Sluigi * This function must be set by netmap_bdgctl(). 217259412Sluigi */ 218270063Sluigi struct netmap_bdg_ops bdg_ops; 219259412Sluigi 220259412Sluigi /* the forwarding table, MAC+ports. 221259412Sluigi * XXX should be changed to an argument to be passed to 222259412Sluigi * the lookup function, and allocated on attach 223259412Sluigi */ 224259412Sluigi struct nm_hash_ent ht[NM_BDG_HASH]; 225285349Sluigi 226285349Sluigi#ifdef CONFIG_NET_NS 227285349Sluigi struct net *ns; 228285349Sluigi#endif /* CONFIG_NET_NS */ 229259412Sluigi}; 230259412Sluigi 231270063Sluigiconst char* 232270063Sluiginetmap_bdg_name(struct netmap_vp_adapter *vp) 233270063Sluigi{ 234270063Sluigi struct nm_bridge *b = vp->na_bdg; 235270063Sluigi if (b == NULL) 236270063Sluigi return NULL; 237270063Sluigi return b->bdg_basename; 238270063Sluigi} 239259412Sluigi 240270063Sluigi 241285349Sluigi#ifndef CONFIG_NET_NS 242259412Sluigi/* 243259412Sluigi * XXX in principle nm_bridges could be created dynamically 244259412Sluigi * Right now we have a static array and deletions are protected 245259412Sluigi * by an exclusive lock. 246259412Sluigi */ 247285349Sluigistruct nm_bridge *nm_bridges; 248285349Sluigi#endif /* !CONFIG_NET_NS */ 249259412Sluigi 250259412Sluigi 251259412Sluigi/* 252259412Sluigi * this is a slightly optimized copy routine which rounds 253259412Sluigi * to multiple of 64 bytes and is often faster than dealing 254259412Sluigi * with other odd sizes. We assume there is enough room 255259412Sluigi * in the source and destination buffers. 256259412Sluigi * 257259412Sluigi * XXX only for multiples of 64 bytes, non overlapped. 258259412Sluigi */ 259259412Sluigistatic inline void 260259412Sluigipkt_copy(void *_src, void *_dst, int l) 261259412Sluigi{ 262259412Sluigi uint64_t *src = _src; 263259412Sluigi uint64_t *dst = _dst; 264259412Sluigi if (unlikely(l >= 1024)) { 265259412Sluigi memcpy(dst, src, l); 266259412Sluigi return; 267259412Sluigi } 268259412Sluigi for (; likely(l > 0); l-=64) { 269259412Sluigi *dst++ = *src++; 270259412Sluigi *dst++ = *src++; 271259412Sluigi *dst++ = *src++; 272259412Sluigi *dst++ = *src++; 273259412Sluigi *dst++ = *src++; 274259412Sluigi *dst++ = *src++; 275259412Sluigi *dst++ = *src++; 276259412Sluigi *dst++ = *src++; 277259412Sluigi } 278259412Sluigi} 279259412Sluigi 280259412Sluigi 281259412Sluigi/* 282259412Sluigi * locate a bridge among the existing ones. 283259412Sluigi * MUST BE CALLED WITH NMG_LOCK() 284259412Sluigi * 285259412Sluigi * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME. 286259412Sluigi * We assume that this is called with a name of at least NM_NAME chars. 287259412Sluigi */ 288259412Sluigistatic struct nm_bridge * 289259412Sluiginm_find_bridge(const char *name, int create) 290259412Sluigi{ 291259412Sluigi int i, l, namelen; 292285349Sluigi struct nm_bridge *b = NULL, *bridges; 293285349Sluigi u_int num_bridges; 294259412Sluigi 295259412Sluigi NMG_LOCK_ASSERT(); 296259412Sluigi 297285349Sluigi netmap_bns_getbridges(&bridges, &num_bridges); 298285349Sluigi 299259412Sluigi namelen = strlen(NM_NAME); /* base length */ 300259412Sluigi l = name ? strlen(name) : 0; /* actual length */ 301259412Sluigi if (l < namelen) { 302259412Sluigi D("invalid bridge name %s", name ? name : NULL); 303259412Sluigi return NULL; 304259412Sluigi } 305259412Sluigi for (i = namelen + 1; i < l; i++) { 306259412Sluigi if (name[i] == ':') { 307259412Sluigi namelen = i; 308259412Sluigi break; 309259412Sluigi } 310259412Sluigi } 311259412Sluigi if (namelen >= IFNAMSIZ) 312259412Sluigi namelen = IFNAMSIZ; 313259412Sluigi ND("--- prefix is '%.*s' ---", namelen, name); 314259412Sluigi 315259412Sluigi /* lookup the name, remember empty slot if there is one */ 316285349Sluigi for (i = 0; i < num_bridges; i++) { 317285349Sluigi struct nm_bridge *x = bridges + i; 318259412Sluigi 319259412Sluigi if (x->bdg_active_ports == 0) { 320259412Sluigi if (create && b == NULL) 321259412Sluigi b = x; /* record empty slot */ 322259412Sluigi } else if (x->bdg_namelen != namelen) { 323259412Sluigi continue; 324259412Sluigi } else if (strncmp(name, x->bdg_basename, namelen) == 0) { 325259412Sluigi ND("found '%.*s' at %d", namelen, name, i); 326259412Sluigi b = x; 327259412Sluigi break; 328259412Sluigi } 329259412Sluigi } 330285349Sluigi if (i == num_bridges && b) { /* name not found, can create entry */ 331259412Sluigi /* initialize the bridge */ 332259412Sluigi strncpy(b->bdg_basename, name, namelen); 333259412Sluigi ND("create new bridge %s with ports %d", b->bdg_basename, 334259412Sluigi b->bdg_active_ports); 335259412Sluigi b->bdg_namelen = namelen; 336259412Sluigi b->bdg_active_ports = 0; 337259412Sluigi for (i = 0; i < NM_BDG_MAXPORTS; i++) 338259412Sluigi b->bdg_port_index[i] = i; 339259412Sluigi /* set the default function */ 340270063Sluigi b->bdg_ops.lookup = netmap_bdg_learning; 341259412Sluigi /* reset the MAC address table */ 342259412Sluigi bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH); 343285349Sluigi NM_BNS_GET(b); 344259412Sluigi } 345259412Sluigi return b; 346259412Sluigi} 347259412Sluigi 348259412Sluigi 349259412Sluigi/* 350259412Sluigi * Free the forwarding tables for rings attached to switch ports. 351259412Sluigi */ 352259412Sluigistatic void 353259412Sluiginm_free_bdgfwd(struct netmap_adapter *na) 354259412Sluigi{ 355259412Sluigi int nrings, i; 356259412Sluigi struct netmap_kring *kring; 357259412Sluigi 358259412Sluigi NMG_LOCK_ASSERT(); 359260368Sluigi nrings = na->num_tx_rings; 360260368Sluigi kring = na->tx_rings; 361259412Sluigi for (i = 0; i < nrings; i++) { 362259412Sluigi if (kring[i].nkr_ft) { 363259412Sluigi free(kring[i].nkr_ft, M_DEVBUF); 364259412Sluigi kring[i].nkr_ft = NULL; /* protect from freeing twice */ 365259412Sluigi } 366259412Sluigi } 367259412Sluigi} 368259412Sluigi 369259412Sluigi 370259412Sluigi/* 371259412Sluigi * Allocate the forwarding tables for the rings attached to the bridge ports. 372259412Sluigi */ 373259412Sluigistatic int 374259412Sluiginm_alloc_bdgfwd(struct netmap_adapter *na) 375259412Sluigi{ 376259412Sluigi int nrings, l, i, num_dstq; 377259412Sluigi struct netmap_kring *kring; 378259412Sluigi 379259412Sluigi NMG_LOCK_ASSERT(); 380259412Sluigi /* all port:rings + broadcast */ 381259412Sluigi num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1; 382259412Sluigi l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX; 383259412Sluigi l += sizeof(struct nm_bdg_q) * num_dstq; 384259412Sluigi l += sizeof(uint16_t) * NM_BDG_BATCH_MAX; 385259412Sluigi 386285349Sluigi nrings = netmap_real_rings(na, NR_TX); 387259412Sluigi kring = na->tx_rings; 388259412Sluigi for (i = 0; i < nrings; i++) { 389259412Sluigi struct nm_bdg_fwd *ft; 390259412Sluigi struct nm_bdg_q *dstq; 391259412Sluigi int j; 392259412Sluigi 393259412Sluigi ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO); 394259412Sluigi if (!ft) { 395259412Sluigi nm_free_bdgfwd(na); 396259412Sluigi return ENOMEM; 397259412Sluigi } 398259412Sluigi dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); 399259412Sluigi for (j = 0; j < num_dstq; j++) { 400259412Sluigi dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL; 401259412Sluigi dstq[j].bq_len = 0; 402259412Sluigi } 403259412Sluigi kring[i].nkr_ft = ft; 404259412Sluigi } 405259412Sluigi return 0; 406259412Sluigi} 407259412Sluigi 408259412Sluigi 409270063Sluigi/* remove from bridge b the ports in slots hw and sw 410270063Sluigi * (sw can be -1 if not needed) 411270063Sluigi */ 412259412Sluigistatic void 413259412Sluiginetmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw) 414259412Sluigi{ 415259412Sluigi int s_hw = hw, s_sw = sw; 416259412Sluigi int i, lim =b->bdg_active_ports; 417259412Sluigi uint8_t tmp[NM_BDG_MAXPORTS]; 418259412Sluigi 419259412Sluigi /* 420259412Sluigi New algorithm: 421259412Sluigi make a copy of bdg_port_index; 422259412Sluigi lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port 423259412Sluigi in the array of bdg_port_index, replacing them with 424259412Sluigi entries from the bottom of the array; 425259412Sluigi decrement bdg_active_ports; 426259412Sluigi acquire BDG_WLOCK() and copy back the array. 427259412Sluigi */ 428259412Sluigi 429261909Sluigi if (netmap_verbose) 430261909Sluigi D("detach %d and %d (lim %d)", hw, sw, lim); 431259412Sluigi /* make a copy of the list of active ports, update it, 432259412Sluigi * and then copy back within BDG_WLOCK(). 433259412Sluigi */ 434259412Sluigi memcpy(tmp, b->bdg_port_index, sizeof(tmp)); 435259412Sluigi for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) { 436259412Sluigi if (hw >= 0 && tmp[i] == hw) { 437259412Sluigi ND("detach hw %d at %d", hw, i); 438259412Sluigi lim--; /* point to last active port */ 439259412Sluigi tmp[i] = tmp[lim]; /* swap with i */ 440259412Sluigi tmp[lim] = hw; /* now this is inactive */ 441259412Sluigi hw = -1; 442259412Sluigi } else if (sw >= 0 && tmp[i] == sw) { 443259412Sluigi ND("detach sw %d at %d", sw, i); 444259412Sluigi lim--; 445259412Sluigi tmp[i] = tmp[lim]; 446259412Sluigi tmp[lim] = sw; 447259412Sluigi sw = -1; 448259412Sluigi } else { 449259412Sluigi i++; 450259412Sluigi } 451259412Sluigi } 452259412Sluigi if (hw >= 0 || sw >= 0) { 453259412Sluigi D("XXX delete failed hw %d sw %d, should panic...", hw, sw); 454259412Sluigi } 455259412Sluigi 456259412Sluigi BDG_WLOCK(b); 457270063Sluigi if (b->bdg_ops.dtor) 458270063Sluigi b->bdg_ops.dtor(b->bdg_ports[s_hw]); 459259412Sluigi b->bdg_ports[s_hw] = NULL; 460259412Sluigi if (s_sw >= 0) { 461259412Sluigi b->bdg_ports[s_sw] = NULL; 462259412Sluigi } 463259412Sluigi memcpy(b->bdg_port_index, tmp, sizeof(tmp)); 464259412Sluigi b->bdg_active_ports = lim; 465259412Sluigi BDG_WUNLOCK(b); 466259412Sluigi 467259412Sluigi ND("now %d active ports", lim); 468259412Sluigi if (lim == 0) { 469259412Sluigi ND("marking bridge %s as free", b->bdg_basename); 470270063Sluigi bzero(&b->bdg_ops, sizeof(b->bdg_ops)); 471285349Sluigi NM_BNS_PUT(b); 472259412Sluigi } 473259412Sluigi} 474259412Sluigi 475270063Sluigi/* nm_bdg_ctl callback for VALE ports */ 476270063Sluigistatic int 477270063Sluiginetmap_vp_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach) 478270063Sluigi{ 479270063Sluigi struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; 480270063Sluigi struct nm_bridge *b = vpna->na_bdg; 481260368Sluigi 482270063Sluigi if (attach) 483270063Sluigi return 0; /* nothing to do */ 484270063Sluigi if (b) { 485270063Sluigi netmap_set_all_rings(na, 0 /* disable */); 486270063Sluigi netmap_bdg_detach_common(b, vpna->bdg_port, -1); 487270063Sluigi vpna->na_bdg = NULL; 488270063Sluigi netmap_set_all_rings(na, 1 /* enable */); 489270063Sluigi } 490270063Sluigi /* I have took reference just for attach */ 491270063Sluigi netmap_adapter_put(na); 492270063Sluigi return 0; 493270063Sluigi} 494270063Sluigi 495270063Sluigi/* nm_dtor callback for ephemeral VALE ports */ 496259412Sluigistatic void 497270063Sluiginetmap_vp_dtor(struct netmap_adapter *na) 498259412Sluigi{ 499259412Sluigi struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na; 500259412Sluigi struct nm_bridge *b = vpna->na_bdg; 501259412Sluigi 502270063Sluigi ND("%s has %d references", na->name, na->na_refcount); 503259412Sluigi 504259412Sluigi if (b) { 505259412Sluigi netmap_bdg_detach_common(b, vpna->bdg_port, -1); 506259412Sluigi } 507270063Sluigi} 508259412Sluigi 509270063Sluigi/* remove a persistent VALE port from the system */ 510270063Sluigistatic int 511270063Sluiginm_vi_destroy(const char *name) 512270063Sluigi{ 513270063Sluigi struct ifnet *ifp; 514270063Sluigi int error; 515260368Sluigi 516270063Sluigi ifp = ifunit_ref(name); 517270063Sluigi if (!ifp) 518270063Sluigi return ENXIO; 519270063Sluigi NMG_LOCK(); 520270063Sluigi /* make sure this is actually a VALE port */ 521270063Sluigi if (!NETMAP_CAPABLE(ifp) || NA(ifp)->nm_register != netmap_vp_reg) { 522270063Sluigi error = EINVAL; 523270063Sluigi goto err; 524270063Sluigi } 525270063Sluigi 526270063Sluigi if (NA(ifp)->na_refcount > 1) { 527270063Sluigi error = EBUSY; 528270063Sluigi goto err; 529270063Sluigi } 530270063Sluigi NMG_UNLOCK(); 531270063Sluigi 532270063Sluigi D("destroying a persistent vale interface %s", ifp->if_xname); 533270063Sluigi /* Linux requires all the references are released 534270063Sluigi * before unregister 535270063Sluigi */ 536270063Sluigi if_rele(ifp); 537270063Sluigi netmap_detach(ifp); 538285698Sluigi nm_vi_detach(ifp); 539270063Sluigi return 0; 540270063Sluigi 541270063Sluigierr: 542270063Sluigi NMG_UNLOCK(); 543270063Sluigi if_rele(ifp); 544270063Sluigi return error; 545270063Sluigi} 546270063Sluigi 547270063Sluigi/* 548270063Sluigi * Create a virtual interface registered to the system. 549270063Sluigi * The interface will be attached to a bridge later. 550270063Sluigi */ 551270063Sluigistatic int 552270063Sluiginm_vi_create(struct nmreq *nmr) 553270063Sluigi{ 554270063Sluigi struct ifnet *ifp; 555270063Sluigi struct netmap_vp_adapter *vpna; 556270063Sluigi int error; 557270063Sluigi 558270063Sluigi /* don't include VALE prefix */ 559270063Sluigi if (!strncmp(nmr->nr_name, NM_NAME, strlen(NM_NAME))) 560270063Sluigi return EINVAL; 561270063Sluigi ifp = ifunit_ref(nmr->nr_name); 562270063Sluigi if (ifp) { /* already exist, cannot create new one */ 563270063Sluigi if_rele(ifp); 564270063Sluigi return EEXIST; 565270063Sluigi } 566270063Sluigi error = nm_vi_persist(nmr->nr_name, &ifp); 567270063Sluigi if (error) 568270063Sluigi return error; 569270063Sluigi 570270063Sluigi NMG_LOCK(); 571270063Sluigi /* netmap_vp_create creates a struct netmap_vp_adapter */ 572270063Sluigi error = netmap_vp_create(nmr, ifp, &vpna); 573270063Sluigi if (error) { 574270063Sluigi D("error %d", error); 575270063Sluigi nm_vi_detach(ifp); 576270063Sluigi return error; 577270063Sluigi } 578270063Sluigi /* persist-specific routines */ 579270063Sluigi vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl; 580270063Sluigi netmap_adapter_get(&vpna->up); 581270063Sluigi NMG_UNLOCK(); 582270063Sluigi D("created %s", ifp->if_xname); 583270063Sluigi return 0; 584270063Sluigi} 585270063Sluigi 586260368Sluigi/* Try to get a reference to a netmap adapter attached to a VALE switch. 587260368Sluigi * If the adapter is found (or is created), this function returns 0, a 588260368Sluigi * non NULL pointer is returned into *na, and the caller holds a 589260368Sluigi * reference to the adapter. 590260368Sluigi * If an adapter is not found, then no reference is grabbed and the 591260368Sluigi * function returns an error code, or 0 if there is just a VALE prefix 592260368Sluigi * mismatch. Therefore the caller holds a reference when 593260368Sluigi * (*na != NULL && return == 0). 594260368Sluigi */ 595259412Sluigiint 596259412Sluiginetmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create) 597259412Sluigi{ 598270063Sluigi char *nr_name = nmr->nr_name; 599270063Sluigi const char *ifname; 600259412Sluigi struct ifnet *ifp; 601259412Sluigi int error = 0; 602270063Sluigi struct netmap_vp_adapter *vpna, *hostna = NULL; 603259412Sluigi struct nm_bridge *b; 604259412Sluigi int i, j, cand = -1, cand2 = -1; 605259412Sluigi int needed; 606259412Sluigi 607259412Sluigi *na = NULL; /* default return value */ 608259412Sluigi 609259412Sluigi /* first try to see if this is a bridge port. */ 610259412Sluigi NMG_LOCK_ASSERT(); 611270063Sluigi if (strncmp(nr_name, NM_NAME, sizeof(NM_NAME) - 1)) { 612259412Sluigi return 0; /* no error, but no VALE prefix */ 613259412Sluigi } 614259412Sluigi 615270063Sluigi b = nm_find_bridge(nr_name, create); 616259412Sluigi if (b == NULL) { 617270063Sluigi D("no bridges available for '%s'", nr_name); 618260700Sluigi return (create ? ENOMEM : ENXIO); 619259412Sluigi } 620270063Sluigi if (strlen(nr_name) < b->bdg_namelen) /* impossible */ 621270063Sluigi panic("x"); 622259412Sluigi 623259412Sluigi /* Now we are sure that name starts with the bridge's name, 624259412Sluigi * lookup the port in the bridge. We need to scan the entire 625259412Sluigi * list. It is not important to hold a WLOCK on the bridge 626259412Sluigi * during the search because NMG_LOCK already guarantees 627259412Sluigi * that there are no other possible writers. 628259412Sluigi */ 629259412Sluigi 630259412Sluigi /* lookup in the local list of ports */ 631259412Sluigi for (j = 0; j < b->bdg_active_ports; j++) { 632259412Sluigi i = b->bdg_port_index[j]; 633259412Sluigi vpna = b->bdg_ports[i]; 634259412Sluigi // KASSERT(na != NULL); 635285349Sluigi ND("checking %s", vpna->up.name); 636270063Sluigi if (!strcmp(vpna->up.name, nr_name)) { 637259412Sluigi netmap_adapter_get(&vpna->up); 638270063Sluigi ND("found existing if %s refs %d", nr_name) 639270063Sluigi *na = &vpna->up; 640259412Sluigi return 0; 641259412Sluigi } 642259412Sluigi } 643259412Sluigi /* not found, should we create it? */ 644259412Sluigi if (!create) 645259412Sluigi return ENXIO; 646259412Sluigi /* yes we should, see if we have space to attach entries */ 647259412Sluigi needed = 2; /* in some cases we only need 1 */ 648259412Sluigi if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) { 649259412Sluigi D("bridge full %d, cannot create new port", b->bdg_active_ports); 650260700Sluigi return ENOMEM; 651259412Sluigi } 652259412Sluigi /* record the next two ports available, but do not allocate yet */ 653259412Sluigi cand = b->bdg_port_index[b->bdg_active_ports]; 654259412Sluigi cand2 = b->bdg_port_index[b->bdg_active_ports + 1]; 655259412Sluigi ND("+++ bridge %s port %s used %d avail %d %d", 656270063Sluigi b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2); 657259412Sluigi 658259412Sluigi /* 659259412Sluigi * try see if there is a matching NIC with this name 660259412Sluigi * (after the bridge's name) 661259412Sluigi */ 662270063Sluigi ifname = nr_name + b->bdg_namelen + 1; 663270063Sluigi ifp = ifunit_ref(ifname); 664270063Sluigi if (!ifp) { 665270063Sluigi /* Create an ephemeral virtual port 666270063Sluigi * This block contains all the ephemeral-specific logics 667270063Sluigi */ 668259412Sluigi if (nmr->nr_cmd) { 669259412Sluigi /* nr_cmd must be 0 for a virtual port */ 670259412Sluigi return EINVAL; 671259412Sluigi } 672259412Sluigi 673259412Sluigi /* bdg_netmap_attach creates a struct netmap_adapter */ 674270063Sluigi error = netmap_vp_create(nmr, NULL, &vpna); 675259412Sluigi if (error) { 676259412Sluigi D("error %d", error); 677259412Sluigi free(ifp, M_DEVBUF); 678259412Sluigi return error; 679259412Sluigi } 680270063Sluigi /* shortcut - we can skip get_hw_na(), 681270063Sluigi * ownership check and nm_bdg_attach() 682270063Sluigi */ 683270063Sluigi } else { 684270063Sluigi struct netmap_adapter *hw; 685259412Sluigi 686270063Sluigi error = netmap_get_hw_na(ifp, &hw); 687270063Sluigi if (error || hw == NULL) 688259412Sluigi goto out; 689259412Sluigi 690270063Sluigi /* host adapter might not be created */ 691270063Sluigi error = hw->nm_bdg_attach(nr_name, hw); 692270063Sluigi if (error) 693259412Sluigi goto out; 694270063Sluigi vpna = hw->na_vp; 695270063Sluigi hostna = hw->na_hostvp; 696270063Sluigi if_rele(ifp); 697259412Sluigi if (nmr->nr_arg1 != NETMAP_BDG_HOST) 698270063Sluigi hostna = NULL; 699259412Sluigi } 700259412Sluigi 701259412Sluigi BDG_WLOCK(b); 702259412Sluigi vpna->bdg_port = cand; 703259412Sluigi ND("NIC %p to bridge port %d", vpna, cand); 704259412Sluigi /* bind the port to the bridge (virtual ports are not active) */ 705259412Sluigi b->bdg_ports[cand] = vpna; 706259412Sluigi vpna->na_bdg = b; 707259412Sluigi b->bdg_active_ports++; 708270063Sluigi if (hostna != NULL) { 709259412Sluigi /* also bind the host stack to the bridge */ 710259412Sluigi b->bdg_ports[cand2] = hostna; 711259412Sluigi hostna->bdg_port = cand2; 712259412Sluigi hostna->na_bdg = b; 713259412Sluigi b->bdg_active_ports++; 714259412Sluigi ND("host %p to bridge port %d", hostna, cand2); 715259412Sluigi } 716270063Sluigi ND("if %s refs %d", ifname, vpna->up.na_refcount); 717259412Sluigi BDG_WUNLOCK(b); 718270063Sluigi *na = &vpna->up; 719270063Sluigi netmap_adapter_get(*na); 720259412Sluigi return 0; 721259412Sluigi 722259412Sluigiout: 723259412Sluigi if_rele(ifp); 724259412Sluigi 725259412Sluigi return error; 726259412Sluigi} 727259412Sluigi 728259412Sluigi 729270063Sluigi/* Process NETMAP_BDG_ATTACH */ 730259412Sluigistatic int 731270063Sluiginm_bdg_ctl_attach(struct nmreq *nmr) 732259412Sluigi{ 733259412Sluigi struct netmap_adapter *na; 734259412Sluigi int error; 735259412Sluigi 736259412Sluigi NMG_LOCK(); 737260700Sluigi 738260368Sluigi error = netmap_get_bdg_na(nmr, &na, 1 /* create if not exists */); 739270063Sluigi if (error) /* no device */ 740259412Sluigi goto unlock_exit; 741260700Sluigi 742260368Sluigi if (na == NULL) { /* VALE prefix missing */ 743259412Sluigi error = EINVAL; 744260368Sluigi goto unlock_exit; 745259412Sluigi } 746259412Sluigi 747270063Sluigi if (NETMAP_OWNED_BY_ANY(na)) { 748259412Sluigi error = EBUSY; 749259412Sluigi goto unref_exit; 750259412Sluigi } 751259412Sluigi 752270063Sluigi if (na->nm_bdg_ctl) { 753270063Sluigi /* nop for VALE ports. The bwrap needs to put the hwna 754270063Sluigi * in netmap mode (see netmap_bwrap_bdg_ctl) 755270063Sluigi */ 756270063Sluigi error = na->nm_bdg_ctl(na, nmr, 1); 757270063Sluigi if (error) 758270063Sluigi goto unref_exit; 759270063Sluigi ND("registered %s to netmap-mode", na->name); 760259412Sluigi } 761259412Sluigi NMG_UNLOCK(); 762259412Sluigi return 0; 763259412Sluigi 764259412Sluigiunref_exit: 765259412Sluigi netmap_adapter_put(na); 766259412Sluigiunlock_exit: 767259412Sluigi NMG_UNLOCK(); 768259412Sluigi return error; 769259412Sluigi} 770259412Sluigi 771260368Sluigi 772270063Sluigi/* process NETMAP_BDG_DETACH */ 773259412Sluigistatic int 774270063Sluiginm_bdg_ctl_detach(struct nmreq *nmr) 775259412Sluigi{ 776259412Sluigi struct netmap_adapter *na; 777259412Sluigi int error; 778259412Sluigi 779259412Sluigi NMG_LOCK(); 780260368Sluigi error = netmap_get_bdg_na(nmr, &na, 0 /* don't create */); 781259412Sluigi if (error) { /* no device, or another bridge or user owns the device */ 782259412Sluigi goto unlock_exit; 783259412Sluigi } 784260700Sluigi 785260368Sluigi if (na == NULL) { /* VALE prefix missing */ 786259412Sluigi error = EINVAL; 787260368Sluigi goto unlock_exit; 788259412Sluigi } 789260368Sluigi 790270063Sluigi if (na->nm_bdg_ctl) { 791270063Sluigi /* remove the port from bridge. The bwrap 792270063Sluigi * also needs to put the hwna in normal mode 793270063Sluigi */ 794270063Sluigi error = na->nm_bdg_ctl(na, nmr, 0); 795259412Sluigi } 796259412Sluigi 797259412Sluigi netmap_adapter_put(na); 798259412Sluigiunlock_exit: 799259412Sluigi NMG_UNLOCK(); 800259412Sluigi return error; 801259412Sluigi 802259412Sluigi} 803259412Sluigi 804259412Sluigi 805270063Sluigi/* Called by either user's context (netmap_ioctl()) 806270063Sluigi * or external kernel modules (e.g., Openvswitch). 807270063Sluigi * Operation is indicated in nmr->nr_cmd. 808270063Sluigi * NETMAP_BDG_OPS that sets configure/lookup/dtor functions to the bridge 809270063Sluigi * requires bdg_ops argument; the other commands ignore this argument. 810270063Sluigi * 811259412Sluigi * Called without NMG_LOCK. 812259412Sluigi */ 813259412Sluigiint 814270063Sluiginetmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops) 815259412Sluigi{ 816285349Sluigi struct nm_bridge *b, *bridges; 817259412Sluigi struct netmap_adapter *na; 818259412Sluigi struct netmap_vp_adapter *vpna; 819259412Sluigi char *name = nmr->nr_name; 820259412Sluigi int cmd = nmr->nr_cmd, namelen = strlen(name); 821259412Sluigi int error = 0, i, j; 822285349Sluigi u_int num_bridges; 823259412Sluigi 824285349Sluigi netmap_bns_getbridges(&bridges, &num_bridges); 825285349Sluigi 826259412Sluigi switch (cmd) { 827270063Sluigi case NETMAP_BDG_NEWIF: 828270063Sluigi error = nm_vi_create(nmr); 829270063Sluigi break; 830270063Sluigi 831270063Sluigi case NETMAP_BDG_DELIF: 832270063Sluigi error = nm_vi_destroy(nmr->nr_name); 833270063Sluigi break; 834270063Sluigi 835259412Sluigi case NETMAP_BDG_ATTACH: 836270063Sluigi error = nm_bdg_ctl_attach(nmr); 837259412Sluigi break; 838259412Sluigi 839259412Sluigi case NETMAP_BDG_DETACH: 840270063Sluigi error = nm_bdg_ctl_detach(nmr); 841259412Sluigi break; 842259412Sluigi 843259412Sluigi case NETMAP_BDG_LIST: 844259412Sluigi /* this is used to enumerate bridges and ports */ 845259412Sluigi if (namelen) { /* look up indexes of bridge and port */ 846259412Sluigi if (strncmp(name, NM_NAME, strlen(NM_NAME))) { 847259412Sluigi error = EINVAL; 848259412Sluigi break; 849259412Sluigi } 850259412Sluigi NMG_LOCK(); 851259412Sluigi b = nm_find_bridge(name, 0 /* don't create */); 852259412Sluigi if (!b) { 853259412Sluigi error = ENOENT; 854259412Sluigi NMG_UNLOCK(); 855259412Sluigi break; 856259412Sluigi } 857259412Sluigi 858259412Sluigi error = ENOENT; 859259412Sluigi for (j = 0; j < b->bdg_active_ports; j++) { 860259412Sluigi i = b->bdg_port_index[j]; 861259412Sluigi vpna = b->bdg_ports[i]; 862259412Sluigi if (vpna == NULL) { 863259412Sluigi D("---AAAAAAAAARGH-------"); 864259412Sluigi continue; 865259412Sluigi } 866259412Sluigi /* the former and the latter identify a 867259412Sluigi * virtual port and a NIC, respectively 868259412Sluigi */ 869270063Sluigi if (!strcmp(vpna->up.name, name)) { 870259412Sluigi /* bridge index */ 871285349Sluigi nmr->nr_arg1 = b - bridges; 872259412Sluigi nmr->nr_arg2 = i; /* port index */ 873259412Sluigi error = 0; 874259412Sluigi break; 875259412Sluigi } 876259412Sluigi } 877259412Sluigi NMG_UNLOCK(); 878259412Sluigi } else { 879259412Sluigi /* return the first non-empty entry starting from 880259412Sluigi * bridge nr_arg1 and port nr_arg2. 881259412Sluigi * 882259412Sluigi * Users can detect the end of the same bridge by 883259412Sluigi * seeing the new and old value of nr_arg1, and can 884259412Sluigi * detect the end of all the bridge by error != 0 885259412Sluigi */ 886259412Sluigi i = nmr->nr_arg1; 887259412Sluigi j = nmr->nr_arg2; 888259412Sluigi 889259412Sluigi NMG_LOCK(); 890259412Sluigi for (error = ENOENT; i < NM_BRIDGES; i++) { 891285349Sluigi b = bridges + i; 892259412Sluigi if (j >= b->bdg_active_ports) { 893259412Sluigi j = 0; /* following bridges scan from 0 */ 894259412Sluigi continue; 895259412Sluigi } 896259412Sluigi nmr->nr_arg1 = i; 897259412Sluigi nmr->nr_arg2 = j; 898259412Sluigi j = b->bdg_port_index[j]; 899259412Sluigi vpna = b->bdg_ports[j]; 900270063Sluigi strncpy(name, vpna->up.name, (size_t)IFNAMSIZ); 901259412Sluigi error = 0; 902259412Sluigi break; 903259412Sluigi } 904259412Sluigi NMG_UNLOCK(); 905259412Sluigi } 906259412Sluigi break; 907259412Sluigi 908270063Sluigi case NETMAP_BDG_REGOPS: /* XXX this should not be available from userspace */ 909270063Sluigi /* register callbacks to the given bridge. 910259412Sluigi * nmr->nr_name may be just bridge's name (including ':' 911259412Sluigi * if it is not just NM_NAME). 912259412Sluigi */ 913270063Sluigi if (!bdg_ops) { 914259412Sluigi error = EINVAL; 915259412Sluigi break; 916259412Sluigi } 917259412Sluigi NMG_LOCK(); 918259412Sluigi b = nm_find_bridge(name, 0 /* don't create */); 919259412Sluigi if (!b) { 920259412Sluigi error = EINVAL; 921259412Sluigi } else { 922270063Sluigi b->bdg_ops = *bdg_ops; 923259412Sluigi } 924259412Sluigi NMG_UNLOCK(); 925259412Sluigi break; 926259412Sluigi 927261909Sluigi case NETMAP_BDG_VNET_HDR: 928261909Sluigi /* Valid lengths for the virtio-net header are 0 (no header), 929261909Sluigi 10 and 12. */ 930261909Sluigi if (nmr->nr_arg1 != 0 && 931261909Sluigi nmr->nr_arg1 != sizeof(struct nm_vnet_hdr) && 932261909Sluigi nmr->nr_arg1 != 12) { 933261909Sluigi error = EINVAL; 934261909Sluigi break; 935261909Sluigi } 936259412Sluigi NMG_LOCK(); 937259412Sluigi error = netmap_get_bdg_na(nmr, &na, 0); 938260368Sluigi if (na && !error) { 939259412Sluigi vpna = (struct netmap_vp_adapter *)na; 940261909Sluigi vpna->virt_hdr_len = nmr->nr_arg1; 941261909Sluigi if (vpna->virt_hdr_len) 942270063Sluigi vpna->mfs = NETMAP_BUF_SIZE(na); 943261909Sluigi D("Using vnet_hdr_len %d for %p", vpna->virt_hdr_len, vpna); 944260368Sluigi netmap_adapter_put(na); 945259412Sluigi } 946259412Sluigi NMG_UNLOCK(); 947259412Sluigi break; 948259412Sluigi 949259412Sluigi default: 950259412Sluigi D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd); 951259412Sluigi error = EINVAL; 952259412Sluigi break; 953259412Sluigi } 954259412Sluigi return error; 955259412Sluigi} 956259412Sluigi 957270063Sluigiint 958270063Sluiginetmap_bdg_config(struct nmreq *nmr) 959270063Sluigi{ 960270063Sluigi struct nm_bridge *b; 961270063Sluigi int error = EINVAL; 962270063Sluigi 963270063Sluigi NMG_LOCK(); 964270063Sluigi b = nm_find_bridge(nmr->nr_name, 0); 965270063Sluigi if (!b) { 966270063Sluigi NMG_UNLOCK(); 967270063Sluigi return error; 968270063Sluigi } 969270063Sluigi NMG_UNLOCK(); 970270063Sluigi /* Don't call config() with NMG_LOCK() held */ 971270063Sluigi BDG_RLOCK(b); 972270063Sluigi if (b->bdg_ops.config != NULL) 973270063Sluigi error = b->bdg_ops.config((struct nm_ifreq *)nmr); 974270063Sluigi BDG_RUNLOCK(b); 975270063Sluigi return error; 976270063Sluigi} 977270063Sluigi 978270063Sluigi 979270063Sluigi/* nm_krings_create callback for VALE ports. 980270063Sluigi * Calls the standard netmap_krings_create, then adds leases on rx 981270063Sluigi * rings and bdgfwd on tx rings. 982270063Sluigi */ 983259412Sluigistatic int 984259412Sluiginetmap_vp_krings_create(struct netmap_adapter *na) 985259412Sluigi{ 986261909Sluigi u_int tailroom; 987259412Sluigi int error, i; 988259412Sluigi uint32_t *leases; 989285349Sluigi u_int nrx = netmap_real_rings(na, NR_RX); 990259412Sluigi 991259412Sluigi /* 992259412Sluigi * Leases are attached to RX rings on vale ports 993259412Sluigi */ 994259412Sluigi tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx; 995259412Sluigi 996261909Sluigi error = netmap_krings_create(na, tailroom); 997259412Sluigi if (error) 998259412Sluigi return error; 999259412Sluigi 1000259412Sluigi leases = na->tailroom; 1001259412Sluigi 1002259412Sluigi for (i = 0; i < nrx; i++) { /* Receive rings */ 1003259412Sluigi na->rx_rings[i].nkr_leases = leases; 1004259412Sluigi leases += na->num_rx_desc; 1005259412Sluigi } 1006259412Sluigi 1007259412Sluigi error = nm_alloc_bdgfwd(na); 1008259412Sluigi if (error) { 1009259412Sluigi netmap_krings_delete(na); 1010259412Sluigi return error; 1011259412Sluigi } 1012259412Sluigi 1013259412Sluigi return 0; 1014259412Sluigi} 1015259412Sluigi 1016260368Sluigi 1017270063Sluigi/* nm_krings_delete callback for VALE ports. */ 1018259412Sluigistatic void 1019259412Sluiginetmap_vp_krings_delete(struct netmap_adapter *na) 1020259412Sluigi{ 1021259412Sluigi nm_free_bdgfwd(na); 1022259412Sluigi netmap_krings_delete(na); 1023259412Sluigi} 1024259412Sluigi 1025259412Sluigi 1026259412Sluigistatic int 1027259412Sluiginm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, 1028259412Sluigi struct netmap_vp_adapter *na, u_int ring_nr); 1029259412Sluigi 1030259412Sluigi 1031259412Sluigi/* 1032270063Sluigi * main dispatch routine for the bridge. 1033259412Sluigi * Grab packets from a kring, move them into the ft structure 1034259412Sluigi * associated to the tx (input) port. Max one instance per port, 1035259412Sluigi * filtered on input (ioctl, poll or XXX). 1036259412Sluigi * Returns the next position in the ring. 1037259412Sluigi */ 1038259412Sluigistatic int 1039270063Sluiginm_bdg_preflush(struct netmap_kring *kring, u_int end) 1040259412Sluigi{ 1041270063Sluigi struct netmap_vp_adapter *na = 1042270063Sluigi (struct netmap_vp_adapter*)kring->na; 1043259412Sluigi struct netmap_ring *ring = kring->ring; 1044259412Sluigi struct nm_bdg_fwd *ft; 1045270063Sluigi u_int ring_nr = kring->ring_id; 1046259412Sluigi u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1; 1047259412Sluigi u_int ft_i = 0; /* start from 0 */ 1048259412Sluigi u_int frags = 1; /* how many frags ? */ 1049259412Sluigi struct nm_bridge *b = na->na_bdg; 1050259412Sluigi 1051259412Sluigi /* To protect against modifications to the bridge we acquire a 1052259412Sluigi * shared lock, waiting if we can sleep (if the source port is 1053259412Sluigi * attached to a user process) or with a trylock otherwise (NICs). 1054259412Sluigi */ 1055259412Sluigi ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j); 1056259412Sluigi if (na->up.na_flags & NAF_BDG_MAYSLEEP) 1057259412Sluigi BDG_RLOCK(b); 1058259412Sluigi else if (!BDG_RTRYLOCK(b)) 1059259412Sluigi return 0; 1060259412Sluigi ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j); 1061259412Sluigi ft = kring->nkr_ft; 1062259412Sluigi 1063259412Sluigi for (; likely(j != end); j = nm_next(j, lim)) { 1064259412Sluigi struct netmap_slot *slot = &ring->slot[j]; 1065259412Sluigi char *buf; 1066259412Sluigi 1067259412Sluigi ft[ft_i].ft_len = slot->len; 1068259412Sluigi ft[ft_i].ft_flags = slot->flags; 1069259412Sluigi 1070259412Sluigi ND("flags is 0x%x", slot->flags); 1071285349Sluigi /* we do not use the buf changed flag, but we still need to reset it */ 1072285349Sluigi slot->flags &= ~NS_BUF_CHANGED; 1073285349Sluigi 1074259412Sluigi /* this slot goes into a list so initialize the link field */ 1075259412Sluigi ft[ft_i].ft_next = NM_FT_NULL; 1076259412Sluigi buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ? 1077270063Sluigi (void *)(uintptr_t)slot->ptr : NMB(&na->up, slot); 1078267151Sluigi if (unlikely(buf == NULL)) { 1079267151Sluigi RD(5, "NULL %s buffer pointer from %s slot %d len %d", 1080267151Sluigi (slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT", 1081267151Sluigi kring->name, j, ft[ft_i].ft_len); 1082270063Sluigi buf = ft[ft_i].ft_buf = NETMAP_BUF_BASE(&na->up); 1083267151Sluigi ft[ft_i].ft_len = 0; 1084267151Sluigi ft[ft_i].ft_flags = 0; 1085267151Sluigi } 1086259487Sluigi __builtin_prefetch(buf); 1087259412Sluigi ++ft_i; 1088259412Sluigi if (slot->flags & NS_MOREFRAG) { 1089259412Sluigi frags++; 1090259412Sluigi continue; 1091259412Sluigi } 1092259412Sluigi if (unlikely(netmap_verbose && frags > 1)) 1093259412Sluigi RD(5, "%d frags at %d", frags, ft_i - frags); 1094259412Sluigi ft[ft_i - frags].ft_frags = frags; 1095259412Sluigi frags = 1; 1096259412Sluigi if (unlikely((int)ft_i >= bridge_batch)) 1097259412Sluigi ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); 1098259412Sluigi } 1099259412Sluigi if (frags > 1) { 1100259412Sluigi D("truncate incomplete fragment at %d (%d frags)", ft_i, frags); 1101259412Sluigi // ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG 1102259412Sluigi ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG; 1103259412Sluigi ft[ft_i - frags].ft_frags = frags - 1; 1104259412Sluigi } 1105259412Sluigi if (ft_i) 1106259412Sluigi ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); 1107259412Sluigi BDG_RUNLOCK(b); 1108259412Sluigi return j; 1109259412Sluigi} 1110259412Sluigi 1111259412Sluigi 1112259412Sluigi/* ----- FreeBSD if_bridge hash function ------- */ 1113259412Sluigi 1114259412Sluigi/* 1115259412Sluigi * The following hash function is adapted from "Hash Functions" by Bob Jenkins 1116259412Sluigi * ("Algorithm Alley", Dr. Dobbs Journal, September 1997). 1117259412Sluigi * 1118259412Sluigi * http://www.burtleburtle.net/bob/hash/spooky.html 1119259412Sluigi */ 1120259412Sluigi#define mix(a, b, c) \ 1121259412Sluigido { \ 1122259412Sluigi a -= b; a -= c; a ^= (c >> 13); \ 1123259412Sluigi b -= c; b -= a; b ^= (a << 8); \ 1124259412Sluigi c -= a; c -= b; c ^= (b >> 13); \ 1125259412Sluigi a -= b; a -= c; a ^= (c >> 12); \ 1126259412Sluigi b -= c; b -= a; b ^= (a << 16); \ 1127259412Sluigi c -= a; c -= b; c ^= (b >> 5); \ 1128259412Sluigi a -= b; a -= c; a ^= (c >> 3); \ 1129259412Sluigi b -= c; b -= a; b ^= (a << 10); \ 1130259412Sluigi c -= a; c -= b; c ^= (b >> 15); \ 1131259412Sluigi} while (/*CONSTCOND*/0) 1132259412Sluigi 1133260368Sluigi 1134259412Sluigistatic __inline uint32_t 1135259412Sluiginm_bridge_rthash(const uint8_t *addr) 1136259412Sluigi{ 1137259412Sluigi uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key 1138259412Sluigi 1139259412Sluigi b += addr[5] << 8; 1140259412Sluigi b += addr[4]; 1141259412Sluigi a += addr[3] << 24; 1142259412Sluigi a += addr[2] << 16; 1143259412Sluigi a += addr[1] << 8; 1144259412Sluigi a += addr[0]; 1145259412Sluigi 1146259412Sluigi mix(a, b, c); 1147259412Sluigi#define BRIDGE_RTHASH_MASK (NM_BDG_HASH-1) 1148259412Sluigi return (c & BRIDGE_RTHASH_MASK); 1149259412Sluigi} 1150259412Sluigi 1151259412Sluigi#undef mix 1152259412Sluigi 1153259412Sluigi 1154270063Sluigi/* nm_register callback for VALE ports */ 1155259412Sluigistatic int 1156270063Sluiginetmap_vp_reg(struct netmap_adapter *na, int onoff) 1157259412Sluigi{ 1158259412Sluigi struct netmap_vp_adapter *vpna = 1159259412Sluigi (struct netmap_vp_adapter*)na; 1160259412Sluigi 1161270063Sluigi /* persistent ports may be put in netmap mode 1162270063Sluigi * before being attached to a bridge 1163259412Sluigi */ 1164270063Sluigi if (vpna->na_bdg) 1165270063Sluigi BDG_WLOCK(vpna->na_bdg); 1166259412Sluigi if (onoff) { 1167270063Sluigi na->na_flags |= NAF_NETMAP_ON; 1168270063Sluigi /* XXX on FreeBSD, persistent VALE ports should also 1169270063Sluigi * toggle IFCAP_NETMAP in na->ifp (2014-03-16) 1170270063Sluigi */ 1171259412Sluigi } else { 1172270063Sluigi na->na_flags &= ~NAF_NETMAP_ON; 1173259412Sluigi } 1174270063Sluigi if (vpna->na_bdg) 1175270063Sluigi BDG_WUNLOCK(vpna->na_bdg); 1176259412Sluigi return 0; 1177259412Sluigi} 1178259412Sluigi 1179259412Sluigi 1180259412Sluigi/* 1181259412Sluigi * Lookup function for a learning bridge. 1182259412Sluigi * Update the hash table with the source address, 1183259412Sluigi * and then returns the destination port index, and the 1184259412Sluigi * ring in *dst_ring (at the moment, always use ring 0) 1185259412Sluigi */ 1186259412Sluigiu_int 1187270063Sluiginetmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, 1188285349Sluigi struct netmap_vp_adapter *na) 1189259412Sluigi{ 1190270063Sluigi uint8_t *buf = ft->ft_buf; 1191270063Sluigi u_int buf_len = ft->ft_len; 1192259412Sluigi struct nm_hash_ent *ht = na->na_bdg->ht; 1193259412Sluigi uint32_t sh, dh; 1194259412Sluigi u_int dst, mysrc = na->bdg_port; 1195259412Sluigi uint64_t smac, dmac; 1196259412Sluigi 1197270063Sluigi /* safety check, unfortunately we have many cases */ 1198270063Sluigi if (buf_len >= 14 + na->virt_hdr_len) { 1199270063Sluigi /* virthdr + mac_hdr in the same slot */ 1200270063Sluigi buf += na->virt_hdr_len; 1201270063Sluigi buf_len -= na->virt_hdr_len; 1202270063Sluigi } else if (buf_len == na->virt_hdr_len && ft->ft_flags & NS_MOREFRAG) { 1203270063Sluigi /* only header in first fragment */ 1204270063Sluigi ft++; 1205270063Sluigi buf = ft->ft_buf; 1206270063Sluigi buf_len = ft->ft_len; 1207270063Sluigi } else { 1208270063Sluigi RD(5, "invalid buf format, length %d", buf_len); 1209259412Sluigi return NM_BDG_NOPORT; 1210259412Sluigi } 1211259412Sluigi dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff; 1212259412Sluigi smac = le64toh(*(uint64_t *)(buf + 4)); 1213259412Sluigi smac >>= 16; 1214259412Sluigi 1215259412Sluigi /* 1216259412Sluigi * The hash is somewhat expensive, there might be some 1217259412Sluigi * worthwhile optimizations here. 1218259412Sluigi */ 1219285349Sluigi if (((buf[6] & 1) == 0) && (na->last_smac != smac)) { /* valid src */ 1220259412Sluigi uint8_t *s = buf+6; 1221259412Sluigi sh = nm_bridge_rthash(s); // XXX hash of source 1222259412Sluigi /* update source port forwarding entry */ 1223285349Sluigi na->last_smac = ht[sh].mac = smac; /* XXX expire ? */ 1224259412Sluigi ht[sh].ports = mysrc; 1225259412Sluigi if (netmap_verbose) 1226259412Sluigi D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d", 1227259412Sluigi s[0], s[1], s[2], s[3], s[4], s[5], mysrc); 1228259412Sluigi } 1229259412Sluigi dst = NM_BDG_BROADCAST; 1230259412Sluigi if ((buf[0] & 1) == 0) { /* unicast */ 1231259412Sluigi dh = nm_bridge_rthash(buf); // XXX hash of dst 1232259412Sluigi if (ht[dh].mac == dmac) { /* found dst */ 1233259412Sluigi dst = ht[dh].ports; 1234259412Sluigi } 1235259412Sluigi /* XXX otherwise return NM_BDG_UNKNOWN ? */ 1236259412Sluigi } 1237259412Sluigi return dst; 1238259412Sluigi} 1239259412Sluigi 1240259412Sluigi 1241259412Sluigi/* 1242260368Sluigi * Available space in the ring. Only used in VALE code 1243260368Sluigi * and only with is_rx = 1 1244260368Sluigi */ 1245260368Sluigistatic inline uint32_t 1246260368Sluiginm_kr_space(struct netmap_kring *k, int is_rx) 1247260368Sluigi{ 1248260368Sluigi int space; 1249260368Sluigi 1250260368Sluigi if (is_rx) { 1251260368Sluigi int busy = k->nkr_hwlease - k->nr_hwcur; 1252260368Sluigi if (busy < 0) 1253260368Sluigi busy += k->nkr_num_slots; 1254260368Sluigi space = k->nkr_num_slots - 1 - busy; 1255260368Sluigi } else { 1256260368Sluigi /* XXX never used in this branch */ 1257260368Sluigi space = k->nr_hwtail - k->nkr_hwlease; 1258260368Sluigi if (space < 0) 1259260368Sluigi space += k->nkr_num_slots; 1260260368Sluigi } 1261260368Sluigi#if 0 1262260368Sluigi // sanity check 1263260368Sluigi if (k->nkr_hwlease >= k->nkr_num_slots || 1264260368Sluigi k->nr_hwcur >= k->nkr_num_slots || 1265260368Sluigi k->nr_tail >= k->nkr_num_slots || 1266260368Sluigi busy < 0 || 1267260368Sluigi busy >= k->nkr_num_slots) { 1268260368Sluigi D("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d", k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease, 1269260368Sluigi k->nkr_lease_idx, k->nkr_num_slots); 1270260368Sluigi } 1271260368Sluigi#endif 1272260368Sluigi return space; 1273260368Sluigi} 1274260368Sluigi 1275260368Sluigi 1276260368Sluigi 1277260368Sluigi 1278260368Sluigi/* make a lease on the kring for N positions. return the 1279260368Sluigi * lease index 1280260368Sluigi * XXX only used in VALE code and with is_rx = 1 1281260368Sluigi */ 1282260368Sluigistatic inline uint32_t 1283260368Sluiginm_kr_lease(struct netmap_kring *k, u_int n, int is_rx) 1284260368Sluigi{ 1285260368Sluigi uint32_t lim = k->nkr_num_slots - 1; 1286260368Sluigi uint32_t lease_idx = k->nkr_lease_idx; 1287260368Sluigi 1288260368Sluigi k->nkr_leases[lease_idx] = NR_NOSLOT; 1289260368Sluigi k->nkr_lease_idx = nm_next(lease_idx, lim); 1290260368Sluigi 1291260368Sluigi if (n > nm_kr_space(k, is_rx)) { 1292260368Sluigi D("invalid request for %d slots", n); 1293260368Sluigi panic("x"); 1294260368Sluigi } 1295260368Sluigi /* XXX verify that there are n slots */ 1296260368Sluigi k->nkr_hwlease += n; 1297260368Sluigi if (k->nkr_hwlease > lim) 1298260368Sluigi k->nkr_hwlease -= lim + 1; 1299260368Sluigi 1300260368Sluigi if (k->nkr_hwlease >= k->nkr_num_slots || 1301260368Sluigi k->nr_hwcur >= k->nkr_num_slots || 1302260368Sluigi k->nr_hwtail >= k->nkr_num_slots || 1303260368Sluigi k->nkr_lease_idx >= k->nkr_num_slots) { 1304260368Sluigi D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d", 1305270063Sluigi k->na->name, 1306260368Sluigi k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease, 1307260368Sluigi k->nkr_lease_idx, k->nkr_num_slots); 1308260368Sluigi } 1309260368Sluigi return lease_idx; 1310260368Sluigi} 1311260368Sluigi 1312260368Sluigi/* 1313270063Sluigi * 1314259412Sluigi * This flush routine supports only unicast and broadcast but a large 1315259412Sluigi * number of ports, and lets us replace the learn and dispatch functions. 1316259412Sluigi */ 1317259412Sluigiint 1318259412Sluiginm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, 1319259412Sluigi u_int ring_nr) 1320259412Sluigi{ 1321259412Sluigi struct nm_bdg_q *dst_ents, *brddst; 1322259412Sluigi uint16_t num_dsts = 0, *dsts; 1323259412Sluigi struct nm_bridge *b = na->na_bdg; 1324259412Sluigi u_int i, j, me = na->bdg_port; 1325259412Sluigi 1326259412Sluigi /* 1327259412Sluigi * The work area (pointed by ft) is followed by an array of 1328259412Sluigi * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS 1329259412Sluigi * queues per port plus one for the broadcast traffic. 1330259412Sluigi * Then we have an array of destination indexes. 1331259412Sluigi */ 1332259412Sluigi dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); 1333259412Sluigi dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1); 1334259412Sluigi 1335259412Sluigi /* first pass: find a destination for each packet in the batch */ 1336259412Sluigi for (i = 0; likely(i < n); i += ft[i].ft_frags) { 1337259412Sluigi uint8_t dst_ring = ring_nr; /* default, same ring as origin */ 1338259412Sluigi uint16_t dst_port, d_i; 1339259412Sluigi struct nm_bdg_q *d; 1340259412Sluigi 1341259412Sluigi ND("slot %d frags %d", i, ft[i].ft_frags); 1342261909Sluigi /* Drop the packet if the virtio-net header is not into the first 1343259412Sluigi fragment nor at the very beginning of the second. */ 1344270063Sluigi if (unlikely(na->virt_hdr_len > ft[i].ft_len)) 1345259412Sluigi continue; 1346270063Sluigi dst_port = b->bdg_ops.lookup(&ft[i], &dst_ring, na); 1347259412Sluigi if (netmap_verbose > 255) 1348259412Sluigi RD(5, "slot %d port %d -> %d", i, me, dst_port); 1349259412Sluigi if (dst_port == NM_BDG_NOPORT) 1350259412Sluigi continue; /* this packet is identified to be dropped */ 1351259412Sluigi else if (unlikely(dst_port > NM_BDG_MAXPORTS)) 1352259412Sluigi continue; 1353259412Sluigi else if (dst_port == NM_BDG_BROADCAST) 1354259412Sluigi dst_ring = 0; /* broadcasts always go to ring 0 */ 1355259412Sluigi else if (unlikely(dst_port == me || 1356259412Sluigi !b->bdg_ports[dst_port])) 1357259412Sluigi continue; 1358259412Sluigi 1359259412Sluigi /* get a position in the scratch pad */ 1360259412Sluigi d_i = dst_port * NM_BDG_MAXRINGS + dst_ring; 1361259412Sluigi d = dst_ents + d_i; 1362259412Sluigi 1363259412Sluigi /* append the first fragment to the list */ 1364259412Sluigi if (d->bq_head == NM_FT_NULL) { /* new destination */ 1365259412Sluigi d->bq_head = d->bq_tail = i; 1366259412Sluigi /* remember this position to be scanned later */ 1367259412Sluigi if (dst_port != NM_BDG_BROADCAST) 1368259412Sluigi dsts[num_dsts++] = d_i; 1369259412Sluigi } else { 1370259412Sluigi ft[d->bq_tail].ft_next = i; 1371259412Sluigi d->bq_tail = i; 1372259412Sluigi } 1373259412Sluigi d->bq_len += ft[i].ft_frags; 1374259412Sluigi } 1375259412Sluigi 1376259412Sluigi /* 1377259412Sluigi * Broadcast traffic goes to ring 0 on all destinations. 1378259412Sluigi * So we need to add these rings to the list of ports to scan. 1379259412Sluigi * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is 1380259412Sluigi * expensive. We should keep a compact list of active destinations 1381259412Sluigi * so we could shorten this loop. 1382259412Sluigi */ 1383259412Sluigi brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS; 1384259412Sluigi if (brddst->bq_head != NM_FT_NULL) { 1385259412Sluigi for (j = 0; likely(j < b->bdg_active_ports); j++) { 1386259412Sluigi uint16_t d_i; 1387259412Sluigi i = b->bdg_port_index[j]; 1388259412Sluigi if (unlikely(i == me)) 1389259412Sluigi continue; 1390259412Sluigi d_i = i * NM_BDG_MAXRINGS; 1391259412Sluigi if (dst_ents[d_i].bq_head == NM_FT_NULL) 1392259412Sluigi dsts[num_dsts++] = d_i; 1393259412Sluigi } 1394259412Sluigi } 1395259412Sluigi 1396259412Sluigi ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts); 1397270063Sluigi /* second pass: scan destinations */ 1398259412Sluigi for (i = 0; i < num_dsts; i++) { 1399259412Sluigi struct netmap_vp_adapter *dst_na; 1400259412Sluigi struct netmap_kring *kring; 1401259412Sluigi struct netmap_ring *ring; 1402261909Sluigi u_int dst_nr, lim, j, d_i, next, brd_next; 1403259412Sluigi u_int needed, howmany; 1404259412Sluigi int retry = netmap_txsync_retry; 1405259412Sluigi struct nm_bdg_q *d; 1406259412Sluigi uint32_t my_start = 0, lease_idx = 0; 1407259412Sluigi int nrings; 1408261909Sluigi int virt_hdr_mismatch = 0; 1409259412Sluigi 1410259412Sluigi d_i = dsts[i]; 1411259412Sluigi ND("second pass %d port %d", i, d_i); 1412259412Sluigi d = dst_ents + d_i; 1413259412Sluigi // XXX fix the division 1414259412Sluigi dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS]; 1415259412Sluigi /* protect from the lookup function returning an inactive 1416259412Sluigi * destination port 1417259412Sluigi */ 1418259412Sluigi if (unlikely(dst_na == NULL)) 1419259412Sluigi goto cleanup; 1420259412Sluigi if (dst_na->up.na_flags & NAF_SW_ONLY) 1421259412Sluigi goto cleanup; 1422259412Sluigi /* 1423259412Sluigi * The interface may be in !netmap mode in two cases: 1424259412Sluigi * - when na is attached but not activated yet; 1425259412Sluigi * - when na is being deactivated but is still attached. 1426259412Sluigi */ 1427270063Sluigi if (unlikely(!nm_netmap_on(&dst_na->up))) { 1428259412Sluigi ND("not in netmap mode!"); 1429259412Sluigi goto cleanup; 1430259412Sluigi } 1431259412Sluigi 1432259412Sluigi /* there is at least one either unicast or broadcast packet */ 1433259412Sluigi brd_next = brddst->bq_head; 1434259412Sluigi next = d->bq_head; 1435259412Sluigi /* we need to reserve this many slots. If fewer are 1436259412Sluigi * available, some packets will be dropped. 1437259412Sluigi * Packets may have multiple fragments, so we may not use 1438259412Sluigi * there is a chance that we may not use all of the slots 1439259412Sluigi * we have claimed, so we will need to handle the leftover 1440259412Sluigi * ones when we regain the lock. 1441259412Sluigi */ 1442259412Sluigi needed = d->bq_len + brddst->bq_len; 1443259412Sluigi 1444261909Sluigi if (unlikely(dst_na->virt_hdr_len != na->virt_hdr_len)) { 1445270063Sluigi RD(3, "virt_hdr_mismatch, src %d dst %d", na->virt_hdr_len, dst_na->virt_hdr_len); 1446261909Sluigi /* There is a virtio-net header/offloadings mismatch between 1447261909Sluigi * source and destination. The slower mismatch datapath will 1448261909Sluigi * be used to cope with all the mismatches. 1449261909Sluigi */ 1450261909Sluigi virt_hdr_mismatch = 1; 1451261909Sluigi if (dst_na->mfs < na->mfs) { 1452261909Sluigi /* We may need to do segmentation offloadings, and so 1453261909Sluigi * we may need a number of destination slots greater 1454261909Sluigi * than the number of input slots ('needed'). 1455261909Sluigi * We look for the smallest integer 'x' which satisfies: 1456261909Sluigi * needed * na->mfs + x * H <= x * na->mfs 1457261909Sluigi * where 'H' is the length of the longest header that may 1458261909Sluigi * be replicated in the segmentation process (e.g. for 1459261909Sluigi * TCPv4 we must account for ethernet header, IP header 1460261909Sluigi * and TCPv4 header). 1461261909Sluigi */ 1462261909Sluigi needed = (needed * na->mfs) / 1463261909Sluigi (dst_na->mfs - WORST_CASE_GSO_HEADER) + 1; 1464261909Sluigi ND(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed); 1465261909Sluigi } 1466261909Sluigi } 1467261909Sluigi 1468259412Sluigi ND(5, "pass 2 dst %d is %x %s", 1469259412Sluigi i, d_i, is_vp ? "virtual" : "nic/host"); 1470259412Sluigi dst_nr = d_i & (NM_BDG_MAXRINGS-1); 1471259412Sluigi nrings = dst_na->up.num_rx_rings; 1472259412Sluigi if (dst_nr >= nrings) 1473259412Sluigi dst_nr = dst_nr % nrings; 1474259412Sluigi kring = &dst_na->up.rx_rings[dst_nr]; 1475259412Sluigi ring = kring->ring; 1476259412Sluigi lim = kring->nkr_num_slots - 1; 1477259412Sluigi 1478259412Sluigiretry: 1479259412Sluigi 1480261909Sluigi if (dst_na->retry && retry) { 1481261909Sluigi /* try to get some free slot from the previous run */ 1482285349Sluigi kring->nm_notify(kring, 0); 1483270063Sluigi /* actually useful only for bwraps, since there 1484270063Sluigi * the notify will trigger a txsync on the hwna. VALE ports 1485270063Sluigi * have dst_na->retry == 0 1486270063Sluigi */ 1487261909Sluigi } 1488259412Sluigi /* reserve the buffers in the queue and an entry 1489259412Sluigi * to report completion, and drop lock. 1490259412Sluigi * XXX this might become a helper function. 1491259412Sluigi */ 1492259412Sluigi mtx_lock(&kring->q_lock); 1493259412Sluigi if (kring->nkr_stopped) { 1494259412Sluigi mtx_unlock(&kring->q_lock); 1495259412Sluigi goto cleanup; 1496259412Sluigi } 1497259412Sluigi my_start = j = kring->nkr_hwlease; 1498259412Sluigi howmany = nm_kr_space(kring, 1); 1499259412Sluigi if (needed < howmany) 1500259412Sluigi howmany = needed; 1501259412Sluigi lease_idx = nm_kr_lease(kring, howmany, 1); 1502259412Sluigi mtx_unlock(&kring->q_lock); 1503259412Sluigi 1504259412Sluigi /* only retry if we need more than available slots */ 1505259412Sluigi if (retry && needed <= howmany) 1506259412Sluigi retry = 0; 1507259412Sluigi 1508259412Sluigi /* copy to the destination queue */ 1509259412Sluigi while (howmany > 0) { 1510259412Sluigi struct netmap_slot *slot; 1511259412Sluigi struct nm_bdg_fwd *ft_p, *ft_end; 1512259412Sluigi u_int cnt; 1513259412Sluigi 1514259412Sluigi /* find the queue from which we pick next packet. 1515259412Sluigi * NM_FT_NULL is always higher than valid indexes 1516259412Sluigi * so we never dereference it if the other list 1517259412Sluigi * has packets (and if both are empty we never 1518259412Sluigi * get here). 1519259412Sluigi */ 1520259412Sluigi if (next < brd_next) { 1521259412Sluigi ft_p = ft + next; 1522259412Sluigi next = ft_p->ft_next; 1523259412Sluigi } else { /* insert broadcast */ 1524259412Sluigi ft_p = ft + brd_next; 1525259412Sluigi brd_next = ft_p->ft_next; 1526259412Sluigi } 1527259412Sluigi cnt = ft_p->ft_frags; // cnt > 0 1528259412Sluigi if (unlikely(cnt > howmany)) 1529259412Sluigi break; /* no more space */ 1530259412Sluigi if (netmap_verbose && cnt > 1) 1531259412Sluigi RD(5, "rx %d frags to %d", cnt, j); 1532259412Sluigi ft_end = ft_p + cnt; 1533261909Sluigi if (unlikely(virt_hdr_mismatch)) { 1534261909Sluigi bdg_mismatch_datapath(na, dst_na, ft_p, ring, &j, lim, &howmany); 1535261909Sluigi } else { 1536261909Sluigi howmany -= cnt; 1537261909Sluigi do { 1538261909Sluigi char *dst, *src = ft_p->ft_buf; 1539261909Sluigi size_t copy_len = ft_p->ft_len, dst_len = copy_len; 1540259412Sluigi 1541261909Sluigi slot = &ring->slot[j]; 1542270063Sluigi dst = NMB(&dst_na->up, slot); 1543259412Sluigi 1544261909Sluigi ND("send [%d] %d(%d) bytes at %s:%d", 1545261909Sluigi i, (int)copy_len, (int)dst_len, 1546261909Sluigi NM_IFPNAME(dst_ifp), j); 1547261909Sluigi /* round to a multiple of 64 */ 1548261909Sluigi copy_len = (copy_len + 63) & ~63; 1549260368Sluigi 1550270063Sluigi if (unlikely(copy_len > NETMAP_BUF_SIZE(&dst_na->up) || 1551270063Sluigi copy_len > NETMAP_BUF_SIZE(&na->up))) { 1552267151Sluigi RD(5, "invalid len %d, down to 64", (int)copy_len); 1553267151Sluigi copy_len = dst_len = 64; // XXX 1554267151Sluigi } 1555261909Sluigi if (ft_p->ft_flags & NS_INDIRECT) { 1556261909Sluigi if (copyin(src, dst, copy_len)) { 1557261909Sluigi // invalid user pointer, pretend len is 0 1558261909Sluigi dst_len = 0; 1559261909Sluigi } 1560261909Sluigi } else { 1561261909Sluigi //memcpy(dst, src, copy_len); 1562261909Sluigi pkt_copy(src, dst, (int)copy_len); 1563261909Sluigi } 1564261909Sluigi slot->len = dst_len; 1565261909Sluigi slot->flags = (cnt << 8)| NS_MOREFRAG; 1566261909Sluigi j = nm_next(j, lim); 1567261909Sluigi needed--; 1568261909Sluigi ft_p++; 1569261909Sluigi } while (ft_p != ft_end); 1570261909Sluigi slot->flags = (cnt << 8); /* clear flag on last entry */ 1571261909Sluigi } 1572259412Sluigi /* are we done ? */ 1573259412Sluigi if (next == NM_FT_NULL && brd_next == NM_FT_NULL) 1574259412Sluigi break; 1575259412Sluigi } 1576259412Sluigi { 1577259412Sluigi /* current position */ 1578259412Sluigi uint32_t *p = kring->nkr_leases; /* shorthand */ 1579259412Sluigi uint32_t update_pos; 1580259412Sluigi int still_locked = 1; 1581259412Sluigi 1582259412Sluigi mtx_lock(&kring->q_lock); 1583259412Sluigi if (unlikely(howmany > 0)) { 1584259412Sluigi /* not used all bufs. If i am the last one 1585259412Sluigi * i can recover the slots, otherwise must 1586259412Sluigi * fill them with 0 to mark empty packets. 1587259412Sluigi */ 1588259412Sluigi ND("leftover %d bufs", howmany); 1589259412Sluigi if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) { 1590259412Sluigi /* yes i am the last one */ 1591259412Sluigi ND("roll back nkr_hwlease to %d", j); 1592259412Sluigi kring->nkr_hwlease = j; 1593259412Sluigi } else { 1594259412Sluigi while (howmany-- > 0) { 1595259412Sluigi ring->slot[j].len = 0; 1596259412Sluigi ring->slot[j].flags = 0; 1597259412Sluigi j = nm_next(j, lim); 1598259412Sluigi } 1599259412Sluigi } 1600259412Sluigi } 1601259412Sluigi p[lease_idx] = j; /* report I am done */ 1602259412Sluigi 1603260368Sluigi update_pos = kring->nr_hwtail; 1604259412Sluigi 1605259412Sluigi if (my_start == update_pos) { 1606259412Sluigi /* all slots before my_start have been reported, 1607259412Sluigi * so scan subsequent leases to see if other ranges 1608259412Sluigi * have been completed, and to a selwakeup or txsync. 1609259412Sluigi */ 1610259412Sluigi while (lease_idx != kring->nkr_lease_idx && 1611259412Sluigi p[lease_idx] != NR_NOSLOT) { 1612259412Sluigi j = p[lease_idx]; 1613259412Sluigi p[lease_idx] = NR_NOSLOT; 1614259412Sluigi lease_idx = nm_next(lease_idx, lim); 1615259412Sluigi } 1616259412Sluigi /* j is the new 'write' position. j != my_start 1617259412Sluigi * means there are new buffers to report 1618259412Sluigi */ 1619259412Sluigi if (likely(j != my_start)) { 1620260368Sluigi kring->nr_hwtail = j; 1621259412Sluigi still_locked = 0; 1622259412Sluigi mtx_unlock(&kring->q_lock); 1623285349Sluigi kring->nm_notify(kring, 0); 1624270063Sluigi /* this is netmap_notify for VALE ports and 1625270063Sluigi * netmap_bwrap_notify for bwrap. The latter will 1626270063Sluigi * trigger a txsync on the underlying hwna 1627270063Sluigi */ 1628270063Sluigi if (dst_na->retry && retry--) { 1629270063Sluigi /* XXX this is going to call nm_notify again. 1630270063Sluigi * Only useful for bwrap in virtual machines 1631270063Sluigi */ 1632259412Sluigi goto retry; 1633270063Sluigi } 1634259412Sluigi } 1635259412Sluigi } 1636259412Sluigi if (still_locked) 1637259412Sluigi mtx_unlock(&kring->q_lock); 1638259412Sluigi } 1639259412Sluigicleanup: 1640259412Sluigi d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */ 1641259412Sluigi d->bq_len = 0; 1642259412Sluigi } 1643259412Sluigi brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */ 1644259412Sluigi brddst->bq_len = 0; 1645259412Sluigi return 0; 1646259412Sluigi} 1647259412Sluigi 1648270063Sluigi/* nm_txsync callback for VALE ports */ 1649259412Sluigistatic int 1650270063Sluiginetmap_vp_txsync(struct netmap_kring *kring, int flags) 1651259412Sluigi{ 1652270063Sluigi struct netmap_vp_adapter *na = 1653270063Sluigi (struct netmap_vp_adapter *)kring->na; 1654260368Sluigi u_int done; 1655260368Sluigi u_int const lim = kring->nkr_num_slots - 1; 1656285349Sluigi u_int const head = kring->rhead; 1657259412Sluigi 1658259412Sluigi if (bridge_batch <= 0) { /* testing only */ 1659285349Sluigi done = head; // used all 1660259412Sluigi goto done; 1661259412Sluigi } 1662270063Sluigi if (!na->na_bdg) { 1663285349Sluigi done = head; 1664270063Sluigi goto done; 1665270063Sluigi } 1666259412Sluigi if (bridge_batch > NM_BDG_BATCH) 1667259412Sluigi bridge_batch = NM_BDG_BATCH; 1668259412Sluigi 1669285349Sluigi done = nm_bdg_preflush(kring, head); 1670259412Sluigidone: 1671285349Sluigi if (done != head) 1672285349Sluigi D("early break at %d/ %d, tail %d", done, head, kring->nr_hwtail); 1673260368Sluigi /* 1674260368Sluigi * packets between 'done' and 'cur' are left unsent. 1675260368Sluigi */ 1676260368Sluigi kring->nr_hwcur = done; 1677260368Sluigi kring->nr_hwtail = nm_prev(done, lim); 1678259412Sluigi if (netmap_verbose) 1679270063Sluigi D("%s ring %d flags %d", na->up.name, kring->ring_id, flags); 1680259412Sluigi return 0; 1681259412Sluigi} 1682259412Sluigi 1683259412Sluigi 1684270063Sluigi/* rxsync code used by VALE ports nm_rxsync callback and also 1685270063Sluigi * internally by the brwap 1686259412Sluigi */ 1687259412Sluigistatic int 1688270063Sluiginetmap_vp_rxsync_locked(struct netmap_kring *kring, int flags) 1689259412Sluigi{ 1690270063Sluigi struct netmap_adapter *na = kring->na; 1691259412Sluigi struct netmap_ring *ring = kring->ring; 1692260368Sluigi u_int nm_i, lim = kring->nkr_num_slots - 1; 1693285349Sluigi u_int head = kring->rhead; 1694259412Sluigi int n; 1695259412Sluigi 1696260368Sluigi if (head > lim) { 1697259412Sluigi D("ouch dangerous reset!!!"); 1698259412Sluigi n = netmap_ring_reinit(kring); 1699259412Sluigi goto done; 1700259412Sluigi } 1701259412Sluigi 1702260368Sluigi /* First part, import newly received packets. */ 1703260368Sluigi /* actually nothing to do here, they are already in the kring */ 1704259412Sluigi 1705260368Sluigi /* Second part, skip past packets that userspace has released. */ 1706260368Sluigi nm_i = kring->nr_hwcur; 1707260368Sluigi if (nm_i != head) { 1708260368Sluigi /* consistency check, but nothing really important here */ 1709260368Sluigi for (n = 0; likely(nm_i != head); n++) { 1710260368Sluigi struct netmap_slot *slot = &ring->slot[nm_i]; 1711270063Sluigi void *addr = NMB(na, slot); 1712259412Sluigi 1713270063Sluigi if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */ 1714259412Sluigi D("bad buffer index %d, ignore ?", 1715259412Sluigi slot->buf_idx); 1716259412Sluigi } 1717259412Sluigi slot->flags &= ~NS_BUF_CHANGED; 1718260368Sluigi nm_i = nm_next(nm_i, lim); 1719259412Sluigi } 1720260368Sluigi kring->nr_hwcur = head; 1721259412Sluigi } 1722260368Sluigi 1723259412Sluigi n = 0; 1724259412Sluigidone: 1725260368Sluigi return n; 1726260368Sluigi} 1727260368Sluigi 1728260368Sluigi/* 1729270063Sluigi * nm_rxsync callback for VALE ports 1730260368Sluigi * user process reading from a VALE switch. 1731260368Sluigi * Already protected against concurrent calls from userspace, 1732260368Sluigi * but we must acquire the queue's lock to protect against 1733260368Sluigi * writers on the same queue. 1734260368Sluigi */ 1735260368Sluigistatic int 1736270063Sluiginetmap_vp_rxsync(struct netmap_kring *kring, int flags) 1737260368Sluigi{ 1738260368Sluigi int n; 1739260368Sluigi 1740260368Sluigi mtx_lock(&kring->q_lock); 1741270063Sluigi n = netmap_vp_rxsync_locked(kring, flags); 1742259412Sluigi mtx_unlock(&kring->q_lock); 1743259412Sluigi return n; 1744259412Sluigi} 1745259412Sluigi 1746260368Sluigi 1747270063Sluigi/* nm_bdg_attach callback for VALE ports 1748270063Sluigi * The na_vp port is this same netmap_adapter. There is no host port. 1749270063Sluigi */ 1750259412Sluigistatic int 1751270063Sluiginetmap_vp_bdg_attach(const char *name, struct netmap_adapter *na) 1752259412Sluigi{ 1753270063Sluigi struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; 1754270063Sluigi 1755270063Sluigi if (vpna->na_bdg) 1756270063Sluigi return EBUSY; 1757270063Sluigi na->na_vp = vpna; 1758270063Sluigi strncpy(na->name, name, sizeof(na->name)); 1759270063Sluigi na->na_hostvp = NULL; 1760270063Sluigi return 0; 1761270063Sluigi} 1762270063Sluigi 1763270063Sluigi/* create a netmap_vp_adapter that describes a VALE port. 1764270063Sluigi * Only persistent VALE ports have a non-null ifp. 1765270063Sluigi */ 1766270063Sluigistatic int 1767270063Sluiginetmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, struct netmap_vp_adapter **ret) 1768270063Sluigi{ 1769259412Sluigi struct netmap_vp_adapter *vpna; 1770259412Sluigi struct netmap_adapter *na; 1771259412Sluigi int error; 1772261909Sluigi u_int npipes = 0; 1773259412Sluigi 1774259412Sluigi vpna = malloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO); 1775259412Sluigi if (vpna == NULL) 1776259412Sluigi return ENOMEM; 1777259412Sluigi 1778259412Sluigi na = &vpna->up; 1779259412Sluigi 1780259412Sluigi na->ifp = ifp; 1781270063Sluigi strncpy(na->name, nmr->nr_name, sizeof(na->name)); 1782259412Sluigi 1783259412Sluigi /* bound checking */ 1784259412Sluigi na->num_tx_rings = nmr->nr_tx_rings; 1785259412Sluigi nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); 1786259412Sluigi nmr->nr_tx_rings = na->num_tx_rings; // write back 1787259412Sluigi na->num_rx_rings = nmr->nr_rx_rings; 1788259412Sluigi nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); 1789259412Sluigi nmr->nr_rx_rings = na->num_rx_rings; // write back 1790259412Sluigi nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE, 1791259412Sluigi 1, NM_BDG_MAXSLOTS, NULL); 1792259412Sluigi na->num_tx_desc = nmr->nr_tx_slots; 1793259412Sluigi nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE, 1794259412Sluigi 1, NM_BDG_MAXSLOTS, NULL); 1795261909Sluigi /* validate number of pipes. We want at least 1, 1796261909Sluigi * but probably can do with some more. 1797261909Sluigi * So let's use 2 as default (when 0 is supplied) 1798261909Sluigi */ 1799261909Sluigi npipes = nmr->nr_arg1; 1800261909Sluigi nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL); 1801261909Sluigi nmr->nr_arg1 = npipes; /* write back */ 1802261909Sluigi /* validate extra bufs */ 1803261909Sluigi nm_bound_var(&nmr->nr_arg3, 0, 0, 1804261909Sluigi 128*NM_BDG_MAXSLOTS, NULL); 1805259412Sluigi na->num_rx_desc = nmr->nr_rx_slots; 1806261909Sluigi vpna->virt_hdr_len = 0; 1807261909Sluigi vpna->mfs = 1514; 1808285349Sluigi vpna->last_smac = ~0llu; 1809261909Sluigi /*if (vpna->mfs > netmap_buf_size) TODO netmap_buf_size is zero?? 1810261909Sluigi vpna->mfs = netmap_buf_size; */ 1811261909Sluigi if (netmap_verbose) 1812261909Sluigi D("max frame size %u", vpna->mfs); 1813259412Sluigi 1814285349Sluigi na->na_flags |= NAF_BDG_MAYSLEEP; 1815285698Sluigi /* persistent VALE ports look like hw devices 1816285698Sluigi * with a native netmap adapter 1817285698Sluigi */ 1818285698Sluigi if (ifp) 1819285698Sluigi na->na_flags |= NAF_NATIVE; 1820270063Sluigi na->nm_txsync = netmap_vp_txsync; 1821270063Sluigi na->nm_rxsync = netmap_vp_rxsync; 1822270063Sluigi na->nm_register = netmap_vp_reg; 1823259412Sluigi na->nm_krings_create = netmap_vp_krings_create; 1824259412Sluigi na->nm_krings_delete = netmap_vp_krings_delete; 1825270063Sluigi na->nm_dtor = netmap_vp_dtor; 1826270063Sluigi na->nm_mem = netmap_mem_private_new(na->name, 1827259412Sluigi na->num_tx_rings, na->num_tx_desc, 1828261909Sluigi na->num_rx_rings, na->num_rx_desc, 1829261909Sluigi nmr->nr_arg3, npipes, &error); 1830261909Sluigi if (na->nm_mem == NULL) 1831261909Sluigi goto err; 1832270063Sluigi na->nm_bdg_attach = netmap_vp_bdg_attach; 1833259412Sluigi /* other nmd fields are set in the common routine */ 1834259412Sluigi error = netmap_attach_common(na); 1835261909Sluigi if (error) 1836261909Sluigi goto err; 1837270063Sluigi *ret = vpna; 1838259412Sluigi return 0; 1839261909Sluigi 1840261909Sluigierr: 1841261909Sluigi if (na->nm_mem != NULL) 1842285349Sluigi netmap_mem_delete(na->nm_mem); 1843261909Sluigi free(vpna, M_DEVBUF); 1844261909Sluigi return error; 1845259412Sluigi} 1846259412Sluigi 1847270063Sluigi/* Bridge wrapper code (bwrap). 1848270063Sluigi * This is used to connect a non-VALE-port netmap_adapter (hwna) to a 1849270063Sluigi * VALE switch. 1850270063Sluigi * The main task is to swap the meaning of tx and rx rings to match the 1851270063Sluigi * expectations of the VALE switch code (see nm_bdg_flush). 1852270063Sluigi * 1853270063Sluigi * The bwrap works by interposing a netmap_bwrap_adapter between the 1854270063Sluigi * rest of the system and the hwna. The netmap_bwrap_adapter looks like 1855270063Sluigi * a netmap_vp_adapter to the rest the system, but, internally, it 1856270063Sluigi * translates all callbacks to what the hwna expects. 1857270063Sluigi * 1858270063Sluigi * Note that we have to intercept callbacks coming from two sides: 1859270063Sluigi * 1860270063Sluigi * - callbacks coming from the netmap module are intercepted by 1861270063Sluigi * passing around the netmap_bwrap_adapter instead of the hwna 1862270063Sluigi * 1863270063Sluigi * - callbacks coming from outside of the netmap module only know 1864270063Sluigi * about the hwna. This, however, only happens in interrupt 1865270063Sluigi * handlers, where only the hwna->nm_notify callback is called. 1866270063Sluigi * What the bwrap does is to overwrite the hwna->nm_notify callback 1867270063Sluigi * with its own netmap_bwrap_intr_notify. 1868270063Sluigi * XXX This assumes that the hwna->nm_notify callback was the 1869270063Sluigi * standard netmap_notify(), as it is the case for nic adapters. 1870270063Sluigi * Any additional action performed by hwna->nm_notify will not be 1871270063Sluigi * performed by netmap_bwrap_intr_notify. 1872270063Sluigi * 1873270063Sluigi * Additionally, the bwrap can optionally attach the host rings pair 1874270063Sluigi * of the wrapped adapter to a different port of the switch. 1875270063Sluigi */ 1876260368Sluigi 1877270063Sluigi 1878259412Sluigistatic void 1879259412Sluiginetmap_bwrap_dtor(struct netmap_adapter *na) 1880259412Sluigi{ 1881259412Sluigi struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; 1882259412Sluigi struct netmap_adapter *hwna = bna->hwna; 1883259412Sluigi 1884259412Sluigi ND("na %p", na); 1885270063Sluigi /* drop reference to hwna->ifp. 1886270063Sluigi * If we don't do this, netmap_detach_common(na) 1887270063Sluigi * will think it has set NA(na->ifp) to NULL 1888270063Sluigi */ 1889270063Sluigi na->ifp = NULL; 1890270063Sluigi /* for safety, also drop the possible reference 1891270063Sluigi * in the hostna 1892270063Sluigi */ 1893270063Sluigi bna->host.up.ifp = NULL; 1894259412Sluigi 1895270063Sluigi hwna->nm_mem = bna->save_nmd; 1896259412Sluigi hwna->na_private = NULL; 1897270063Sluigi hwna->na_vp = hwna->na_hostvp = NULL; 1898270063Sluigi hwna->na_flags &= ~NAF_BUSY; 1899259412Sluigi netmap_adapter_put(hwna); 1900259412Sluigi 1901259412Sluigi} 1902259412Sluigi 1903260368Sluigi 1904259412Sluigi/* 1905260368Sluigi * Intr callback for NICs connected to a bridge. 1906260368Sluigi * Simply ignore tx interrupts (maybe we could try to recover space ?) 1907260368Sluigi * and pass received packets from nic to the bridge. 1908260368Sluigi * 1909259412Sluigi * XXX TODO check locking: this is called from the interrupt 1910259412Sluigi * handler so we should make sure that the interface is not 1911259412Sluigi * disconnected while passing down an interrupt. 1912259412Sluigi * 1913260368Sluigi * Note, no user process can access this NIC or the host stack. 1914260368Sluigi * The only part of the ring that is significant are the slots, 1915260368Sluigi * and head/cur/tail are set from the kring as needed 1916260368Sluigi * (part as a receive ring, part as a transmit ring). 1917260368Sluigi * 1918260368Sluigi * callback that overwrites the hwna notify callback. 1919259412Sluigi * Packets come from the outside or from the host stack and are put on an hwna rx ring. 1920259412Sluigi * The bridge wrapper then sends the packets through the bridge. 1921259412Sluigi */ 1922259412Sluigistatic int 1923285349Sluiginetmap_bwrap_intr_notify(struct netmap_kring *kring, int flags) 1924259412Sluigi{ 1925285349Sluigi struct netmap_adapter *na = kring->na; 1926259412Sluigi struct netmap_bwrap_adapter *bna = na->na_private; 1927285349Sluigi struct netmap_kring *bkring; 1928259412Sluigi struct netmap_vp_adapter *vpna = &bna->up; 1929285349Sluigi u_int ring_nr = kring->ring_id; 1930259412Sluigi int error = 0; 1931259412Sluigi 1932260368Sluigi if (netmap_verbose) 1933285349Sluigi D("%s %s 0x%x", na->name, kring->name, flags); 1934259412Sluigi 1935270063Sluigi if (!nm_netmap_on(na)) 1936259412Sluigi return 0; 1937259412Sluigi 1938285349Sluigi bkring = &vpna->up.tx_rings[ring_nr]; 1939259412Sluigi 1940259412Sluigi /* make sure the ring is not disabled */ 1941259412Sluigi if (nm_kr_tryget(kring)) 1942259412Sluigi return 0; 1943259412Sluigi 1944260368Sluigi if (netmap_verbose) 1945285349Sluigi D("%s head %d cur %d tail %d", na->name, 1946260368Sluigi kring->rhead, kring->rcur, kring->rtail); 1947260368Sluigi 1948285349Sluigi /* simulate a user wakeup on the rx ring 1949285349Sluigi * fetch packets that have arrived. 1950261909Sluigi */ 1951261909Sluigi error = kring->nm_sync(kring, 0); 1952261909Sluigi if (error) 1953261909Sluigi goto put_out; 1954260368Sluigi if (kring->nr_hwcur == kring->nr_hwtail && netmap_verbose) { 1955259412Sluigi D("how strange, interrupt with no packets on %s", 1956270063Sluigi na->name); 1957259412Sluigi goto put_out; 1958259412Sluigi } 1959260368Sluigi 1960285349Sluigi /* new packets are kring->rcur to kring->nr_hwtail, and the bkring 1961285349Sluigi * had hwcur == bkring->rhead. So advance bkring->rhead to kring->nr_hwtail 1962260368Sluigi * to push all packets out. 1963260368Sluigi */ 1964285349Sluigi bkring->rhead = bkring->rcur = kring->nr_hwtail; 1965260368Sluigi 1966270063Sluigi netmap_vp_txsync(bkring, flags); 1967259412Sluigi 1968260368Sluigi /* mark all buffers as released on this ring */ 1969285349Sluigi kring->rhead = kring->rcur = kring->rtail = kring->nr_hwtail; 1970260368Sluigi /* another call to actually release the buffers */ 1971285349Sluigi error = kring->nm_sync(kring, 0); 1972259412Sluigi 1973259412Sluigiput_out: 1974259412Sluigi nm_kr_put(kring); 1975259412Sluigi return error; 1976259412Sluigi} 1977259412Sluigi 1978260368Sluigi 1979270063Sluigi/* nm_register callback for bwrap */ 1980259412Sluigistatic int 1981259412Sluiginetmap_bwrap_register(struct netmap_adapter *na, int onoff) 1982259412Sluigi{ 1983259412Sluigi struct netmap_bwrap_adapter *bna = 1984259412Sluigi (struct netmap_bwrap_adapter *)na; 1985259412Sluigi struct netmap_adapter *hwna = bna->hwna; 1986259412Sluigi struct netmap_vp_adapter *hostna = &bna->host; 1987259412Sluigi int error; 1988285349Sluigi enum txrx t; 1989259412Sluigi 1990270063Sluigi ND("%s %s", na->name, onoff ? "on" : "off"); 1991259412Sluigi 1992259412Sluigi if (onoff) { 1993259412Sluigi int i; 1994259412Sluigi 1995270063Sluigi /* netmap_do_regif has been called on the bwrap na. 1996270063Sluigi * We need to pass the information about the 1997270063Sluigi * memory allocator down to the hwna before 1998270063Sluigi * putting it in netmap mode 1999270063Sluigi */ 2000259412Sluigi hwna->na_lut = na->na_lut; 2001259412Sluigi 2002259412Sluigi if (hostna->na_bdg) { 2003270063Sluigi /* if the host rings have been attached to switch, 2004270063Sluigi * we need to copy the memory allocator information 2005270063Sluigi * in the hostna also 2006270063Sluigi */ 2007259412Sluigi hostna->up.na_lut = na->na_lut; 2008259412Sluigi } 2009259412Sluigi 2010260516Sluigi /* cross-link the netmap rings 2011260516Sluigi * The original number of rings comes from hwna, 2012260516Sluigi * rx rings on one side equals tx rings on the other. 2013270063Sluigi * We need to do this now, after the initialization 2014270063Sluigi * of the kring->ring pointers 2015260516Sluigi */ 2016285349Sluigi for_rx_tx(t) { 2017285349Sluigi enum txrx r= nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ 2018285349Sluigi for (i = 0; i < nma_get_nrings(na, r) + 1; i++) { 2019285349Sluigi NMR(hwna, t)[i].nkr_num_slots = NMR(na, r)[i].nkr_num_slots; 2020285349Sluigi NMR(hwna, t)[i].ring = NMR(na, r)[i].ring; 2021285349Sluigi } 2022259412Sluigi } 2023259412Sluigi } 2024259412Sluigi 2025270063Sluigi /* forward the request to the hwna */ 2026270063Sluigi error = hwna->nm_register(hwna, onoff); 2027270063Sluigi if (error) 2028270063Sluigi return error; 2029259412Sluigi 2030270063Sluigi /* impersonate a netmap_vp_adapter */ 2031270063Sluigi netmap_vp_reg(na, onoff); 2032270063Sluigi if (hostna->na_bdg) 2033270063Sluigi netmap_vp_reg(&hostna->up, onoff); 2034259412Sluigi 2035259412Sluigi if (onoff) { 2036285349Sluigi u_int i; 2037285349Sluigi /* intercept the hwna nm_nofify callback on the hw rings */ 2038285349Sluigi for (i = 0; i < hwna->num_rx_rings; i++) { 2039285349Sluigi hwna->rx_rings[i].save_notify = hwna->rx_rings[i].nm_notify; 2040285349Sluigi hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify; 2041285349Sluigi } 2042285349Sluigi i = hwna->num_rx_rings; /* for safety */ 2043285349Sluigi /* save the host ring notify unconditionally */ 2044285349Sluigi hwna->rx_rings[i].save_notify = hwna->rx_rings[i].nm_notify; 2045285349Sluigi if (hostna->na_bdg) { 2046285349Sluigi /* also intercept the host ring notify */ 2047285349Sluigi hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify; 2048285349Sluigi } 2049259412Sluigi } else { 2050285349Sluigi u_int i; 2051285349Sluigi /* reset all notify callbacks (including host ring) */ 2052285349Sluigi for (i = 0; i <= hwna->num_rx_rings; i++) { 2053285349Sluigi hwna->rx_rings[i].nm_notify = hwna->rx_rings[i].save_notify; 2054285349Sluigi hwna->rx_rings[i].save_notify = NULL; 2055285349Sluigi } 2056285349Sluigi hwna->na_lut.lut = NULL; 2057285349Sluigi hwna->na_lut.objtotal = 0; 2058285349Sluigi hwna->na_lut.objsize = 0; 2059259412Sluigi } 2060259412Sluigi 2061259412Sluigi return 0; 2062259412Sluigi} 2063259412Sluigi 2064270063Sluigi/* nm_config callback for bwrap */ 2065259412Sluigistatic int 2066259412Sluiginetmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd, 2067259412Sluigi u_int *rxr, u_int *rxd) 2068259412Sluigi{ 2069259412Sluigi struct netmap_bwrap_adapter *bna = 2070259412Sluigi (struct netmap_bwrap_adapter *)na; 2071259412Sluigi struct netmap_adapter *hwna = bna->hwna; 2072259412Sluigi 2073259412Sluigi /* forward the request */ 2074259412Sluigi netmap_update_config(hwna); 2075259412Sluigi /* swap the results */ 2076259412Sluigi *txr = hwna->num_rx_rings; 2077259412Sluigi *txd = hwna->num_rx_desc; 2078259412Sluigi *rxr = hwna->num_tx_rings; 2079259412Sluigi *rxd = hwna->num_rx_desc; 2080259412Sluigi 2081259412Sluigi return 0; 2082259412Sluigi} 2083259412Sluigi 2084260368Sluigi 2085270063Sluigi/* nm_krings_create callback for bwrap */ 2086259412Sluigistatic int 2087259412Sluiginetmap_bwrap_krings_create(struct netmap_adapter *na) 2088259412Sluigi{ 2089259412Sluigi struct netmap_bwrap_adapter *bna = 2090259412Sluigi (struct netmap_bwrap_adapter *)na; 2091259412Sluigi struct netmap_adapter *hwna = bna->hwna; 2092259412Sluigi struct netmap_adapter *hostna = &bna->host.up; 2093259412Sluigi int error; 2094259412Sluigi 2095270063Sluigi ND("%s", na->name); 2096259412Sluigi 2097270063Sluigi /* impersonate a netmap_vp_adapter */ 2098259412Sluigi error = netmap_vp_krings_create(na); 2099259412Sluigi if (error) 2100259412Sluigi return error; 2101259412Sluigi 2102270063Sluigi /* also create the hwna krings */ 2103259412Sluigi error = hwna->nm_krings_create(hwna); 2104259412Sluigi if (error) { 2105259412Sluigi netmap_vp_krings_delete(na); 2106259412Sluigi return error; 2107259412Sluigi } 2108270063Sluigi /* the connection between the bwrap krings and the hwna krings 2109270063Sluigi * will be perfomed later, in the nm_register callback, since 2110270063Sluigi * now the kring->ring pointers have not been initialized yet 2111270063Sluigi */ 2112259412Sluigi 2113261909Sluigi if (na->na_flags & NAF_HOST_RINGS) { 2114270063Sluigi /* the hostna rings are the host rings of the bwrap. 2115270063Sluigi * The corresponding krings must point back to the 2116270063Sluigi * hostna 2117270063Sluigi */ 2118285349Sluigi hostna->tx_rings = &na->tx_rings[na->num_tx_rings]; 2119270063Sluigi hostna->tx_rings[0].na = hostna; 2120285349Sluigi hostna->rx_rings = &na->rx_rings[na->num_rx_rings]; 2121270063Sluigi hostna->rx_rings[0].na = hostna; 2122261909Sluigi } 2123259412Sluigi 2124259412Sluigi return 0; 2125259412Sluigi} 2126259412Sluigi 2127260368Sluigi 2128259412Sluigistatic void 2129259412Sluiginetmap_bwrap_krings_delete(struct netmap_adapter *na) 2130259412Sluigi{ 2131259412Sluigi struct netmap_bwrap_adapter *bna = 2132259412Sluigi (struct netmap_bwrap_adapter *)na; 2133259412Sluigi struct netmap_adapter *hwna = bna->hwna; 2134259412Sluigi 2135270063Sluigi ND("%s", na->name); 2136259412Sluigi 2137259412Sluigi hwna->nm_krings_delete(hwna); 2138259412Sluigi netmap_vp_krings_delete(na); 2139259412Sluigi} 2140259412Sluigi 2141260368Sluigi 2142259412Sluigi/* notify method for the bridge-->hwna direction */ 2143259412Sluigistatic int 2144285349Sluiginetmap_bwrap_notify(struct netmap_kring *kring, int flags) 2145259412Sluigi{ 2146285349Sluigi struct netmap_adapter *na = kring->na; 2147285349Sluigi struct netmap_bwrap_adapter *bna = na->na_private; 2148259412Sluigi struct netmap_adapter *hwna = bna->hwna; 2149285349Sluigi u_int ring_n = kring->ring_id; 2150285349Sluigi u_int lim = kring->nkr_num_slots - 1; 2151285349Sluigi struct netmap_kring *hw_kring; 2152259412Sluigi int error = 0; 2153259412Sluigi 2154285349Sluigi ND("%s: na %s hwna %s", 2155285349Sluigi (kring ? kring->name : "NULL!"), 2156285349Sluigi (na ? na->name : "NULL!"), 2157285349Sluigi (hwna ? hwna->name : "NULL!")); 2158259412Sluigi hw_kring = &hwna->tx_rings[ring_n]; 2159259412Sluigi 2160285349Sluigi if (nm_kr_tryget(hw_kring)) 2161285349Sluigi return 0; 2162285349Sluigi 2163270063Sluigi if (!nm_netmap_on(hwna)) 2164259412Sluigi return 0; 2165260368Sluigi /* first step: simulate a user wakeup on the rx ring */ 2166285349Sluigi netmap_vp_rxsync(kring, flags); 2167260368Sluigi ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", 2168270063Sluigi na->name, ring_n, 2169260368Sluigi kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, 2170260368Sluigi ring->head, ring->cur, ring->tail, 2171260368Sluigi hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail); 2172285349Sluigi /* second step: the new packets are sent on the tx ring 2173260368Sluigi * (which is actually the same ring) 2174260368Sluigi */ 2175285349Sluigi hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail; 2176261909Sluigi error = hw_kring->nm_sync(hw_kring, flags); 2177285349Sluigi if (error) 2178285349Sluigi goto out; 2179260368Sluigi 2180285349Sluigi /* third step: now we are back the rx ring */ 2181260368Sluigi /* claim ownership on all hw owned bufs */ 2182285349Sluigi kring->rhead = kring->rcur = nm_next(hw_kring->nr_hwtail, lim); /* skip past reserved slot */ 2183260368Sluigi 2184285349Sluigi /* fourth step: the user goes to sleep again, causing another rxsync */ 2185285349Sluigi netmap_vp_rxsync(kring, flags); 2186260368Sluigi ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", 2187270063Sluigi na->name, ring_n, 2188260368Sluigi kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, 2189260368Sluigi ring->head, ring->cur, ring->tail, 2190260368Sluigi hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail); 2191285349Sluigiout: 2192285349Sluigi nm_kr_put(hw_kring); 2193259412Sluigi return error; 2194259412Sluigi} 2195259412Sluigi 2196260368Sluigi 2197270063Sluigi/* nm_bdg_ctl callback for the bwrap. 2198270063Sluigi * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd]. 2199270063Sluigi * On attach, it needs to provide a fake netmap_priv_d structure and 2200270063Sluigi * perform a netmap_do_regif() on the bwrap. This will put both the 2201270063Sluigi * bwrap and the hwna in netmap mode, with the netmap rings shared 2202270063Sluigi * and cross linked. Moroever, it will start intercepting interrupts 2203270063Sluigi * directed to hwna. 2204270063Sluigi */ 2205259412Sluigistatic int 2206270063Sluiginetmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach) 2207259412Sluigi{ 2208270063Sluigi struct netmap_priv_d *npriv; 2209270063Sluigi struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; 2210270063Sluigi int error = 0; 2211270063Sluigi 2212270063Sluigi if (attach) { 2213270063Sluigi if (NETMAP_OWNED_BY_ANY(na)) { 2214270063Sluigi return EBUSY; 2215270063Sluigi } 2216270063Sluigi if (bna->na_kpriv) { 2217270063Sluigi /* nothing to do */ 2218270063Sluigi return 0; 2219270063Sluigi } 2220270063Sluigi npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO); 2221270063Sluigi if (npriv == NULL) 2222270063Sluigi return ENOMEM; 2223285349Sluigi error = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags); 2224285349Sluigi if (error) { 2225270063Sluigi bzero(npriv, sizeof(*npriv)); 2226270063Sluigi free(npriv, M_DEVBUF); 2227270063Sluigi return error; 2228270063Sluigi } 2229270063Sluigi bna->na_kpriv = npriv; 2230270063Sluigi na->na_flags |= NAF_BUSY; 2231270063Sluigi } else { 2232270063Sluigi int last_instance; 2233270063Sluigi 2234270063Sluigi if (na->active_fds == 0) /* not registered */ 2235270063Sluigi return EINVAL; 2236270063Sluigi last_instance = netmap_dtor_locked(bna->na_kpriv); 2237270063Sluigi if (!last_instance) { 2238270063Sluigi D("--- error, trying to detach an entry with active mmaps"); 2239270063Sluigi error = EINVAL; 2240270063Sluigi } else { 2241270063Sluigi struct nm_bridge *b = bna->up.na_bdg, 2242270063Sluigi *bh = bna->host.na_bdg; 2243270063Sluigi npriv = bna->na_kpriv; 2244270063Sluigi bna->na_kpriv = NULL; 2245270063Sluigi D("deleting priv"); 2246270063Sluigi 2247270063Sluigi bzero(npriv, sizeof(*npriv)); 2248270063Sluigi free(npriv, M_DEVBUF); 2249270063Sluigi if (b) { 2250270063Sluigi /* XXX the bwrap dtor should take care 2251270063Sluigi * of this (2014-06-16) 2252270063Sluigi */ 2253270063Sluigi netmap_bdg_detach_common(b, bna->up.bdg_port, 2254270063Sluigi (bh ? bna->host.bdg_port : -1)); 2255270063Sluigi } 2256270063Sluigi na->na_flags &= ~NAF_BUSY; 2257270063Sluigi } 2258270063Sluigi } 2259270063Sluigi return error; 2260270063Sluigi 2261270063Sluigi} 2262270063Sluigi 2263270063Sluigi/* attach a bridge wrapper to the 'real' device */ 2264270063Sluigiint 2265270063Sluiginetmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna) 2266270063Sluigi{ 2267259412Sluigi struct netmap_bwrap_adapter *bna; 2268270063Sluigi struct netmap_adapter *na = NULL; 2269270063Sluigi struct netmap_adapter *hostna = NULL; 2270270063Sluigi int error = 0; 2271285349Sluigi enum txrx t; 2272259412Sluigi 2273270063Sluigi /* make sure the NIC is not already in use */ 2274270063Sluigi if (NETMAP_OWNED_BY_ANY(hwna)) { 2275270063Sluigi D("NIC %s busy, cannot attach to bridge", hwna->name); 2276270063Sluigi return EBUSY; 2277270063Sluigi } 2278259412Sluigi 2279259412Sluigi bna = malloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO); 2280270063Sluigi if (bna == NULL) { 2281259412Sluigi return ENOMEM; 2282270063Sluigi } 2283259412Sluigi 2284259412Sluigi na = &bna->up.up; 2285285349Sluigi na->na_private = bna; 2286270063Sluigi strncpy(na->name, nr_name, sizeof(na->name)); 2287259412Sluigi /* fill the ring data for the bwrap adapter with rx/tx meanings 2288259412Sluigi * swapped. The real cross-linking will be done during register, 2289259412Sluigi * when all the krings will have been created. 2290259412Sluigi */ 2291285349Sluigi for_rx_tx(t) { 2292285349Sluigi enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ 2293285349Sluigi nma_set_nrings(na, t, nma_get_nrings(hwna, r)); 2294285349Sluigi nma_set_ndesc(na, t, nma_get_ndesc(hwna, r)); 2295285349Sluigi } 2296259412Sluigi na->nm_dtor = netmap_bwrap_dtor; 2297259412Sluigi na->nm_register = netmap_bwrap_register; 2298259412Sluigi // na->nm_txsync = netmap_bwrap_txsync; 2299259412Sluigi // na->nm_rxsync = netmap_bwrap_rxsync; 2300259412Sluigi na->nm_config = netmap_bwrap_config; 2301259412Sluigi na->nm_krings_create = netmap_bwrap_krings_create; 2302259412Sluigi na->nm_krings_delete = netmap_bwrap_krings_delete; 2303259412Sluigi na->nm_notify = netmap_bwrap_notify; 2304270063Sluigi na->nm_bdg_ctl = netmap_bwrap_bdg_ctl; 2305270063Sluigi na->pdev = hwna->pdev; 2306270063Sluigi na->nm_mem = netmap_mem_private_new(na->name, 2307270063Sluigi na->num_tx_rings, na->num_tx_desc, 2308270063Sluigi na->num_rx_rings, na->num_rx_desc, 2309270063Sluigi 0, 0, &error); 2310270063Sluigi na->na_flags |= NAF_MEM_OWNER; 2311270063Sluigi if (na->nm_mem == NULL) 2312270063Sluigi goto err_put; 2313259412Sluigi bna->up.retry = 1; /* XXX maybe this should depend on the hwna */ 2314259412Sluigi 2315259412Sluigi bna->hwna = hwna; 2316259412Sluigi netmap_adapter_get(hwna); 2317259412Sluigi hwna->na_private = bna; /* weak reference */ 2318270063Sluigi hwna->na_vp = &bna->up; 2319270063Sluigi 2320261909Sluigi if (hwna->na_flags & NAF_HOST_RINGS) { 2321270063Sluigi if (hwna->na_flags & NAF_SW_ONLY) 2322270063Sluigi na->na_flags |= NAF_SW_ONLY; 2323261909Sluigi na->na_flags |= NAF_HOST_RINGS; 2324261909Sluigi hostna = &bna->host.up; 2325270063Sluigi snprintf(hostna->name, sizeof(hostna->name), "%s^", nr_name); 2326261909Sluigi hostna->ifp = hwna->ifp; 2327285349Sluigi for_rx_tx(t) { 2328285349Sluigi enum txrx r = nm_txrx_swap(t); 2329285349Sluigi nma_set_nrings(hostna, t, 1); 2330285349Sluigi nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r)); 2331285349Sluigi } 2332261909Sluigi // hostna->nm_txsync = netmap_bwrap_host_txsync; 2333261909Sluigi // hostna->nm_rxsync = netmap_bwrap_host_rxsync; 2334285349Sluigi hostna->nm_notify = netmap_bwrap_notify; 2335261909Sluigi hostna->nm_mem = na->nm_mem; 2336261909Sluigi hostna->na_private = bna; 2337270063Sluigi hostna->na_vp = &bna->up; 2338270063Sluigi na->na_hostvp = hwna->na_hostvp = 2339270063Sluigi hostna->na_hostvp = &bna->host; 2340270063Sluigi hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */ 2341261909Sluigi } 2342259412Sluigi 2343260368Sluigi ND("%s<->%s txr %d txd %d rxr %d rxd %d", 2344270063Sluigi na->name, ifp->if_xname, 2345259412Sluigi na->num_tx_rings, na->num_tx_desc, 2346259412Sluigi na->num_rx_rings, na->num_rx_desc); 2347259412Sluigi 2348259412Sluigi error = netmap_attach_common(na); 2349259412Sluigi if (error) { 2350270063Sluigi goto err_free; 2351259412Sluigi } 2352270063Sluigi /* make bwrap ifp point to the real ifp 2353270063Sluigi * NOTE: netmap_attach_common() interprets a non-NULL na->ifp 2354270063Sluigi * as a request to make the ifp point to the na. Since we 2355270063Sluigi * do not want to change the na already pointed to by hwna->ifp, 2356270063Sluigi * the following assignment has to be delayed until now 2357270063Sluigi */ 2358270063Sluigi na->ifp = hwna->ifp; 2359270063Sluigi hwna->na_flags |= NAF_BUSY; 2360270063Sluigi /* make hwna point to the allocator we are actually using, 2361270063Sluigi * so that monitors will be able to find it 2362270063Sluigi */ 2363270063Sluigi bna->save_nmd = hwna->nm_mem; 2364270063Sluigi hwna->nm_mem = na->nm_mem; 2365259412Sluigi return 0; 2366270063Sluigi 2367270063Sluigierr_free: 2368285349Sluigi netmap_mem_delete(na->nm_mem); 2369270063Sluigierr_put: 2370270063Sluigi hwna->na_vp = hwna->na_hostvp = NULL; 2371270063Sluigi netmap_adapter_put(hwna); 2372270063Sluigi free(bna, M_DEVBUF); 2373270063Sluigi return error; 2374270063Sluigi 2375259412Sluigi} 2376259412Sluigi 2377285349Sluigistruct nm_bridge * 2378285349Sluiginetmap_init_bridges2(u_int n) 2379285349Sluigi{ 2380285349Sluigi int i; 2381285349Sluigi struct nm_bridge *b; 2382260368Sluigi 2383285349Sluigi b = malloc(sizeof(struct nm_bridge) * n, M_DEVBUF, 2384285349Sluigi M_NOWAIT | M_ZERO); 2385285349Sluigi if (b == NULL) 2386285349Sluigi return NULL; 2387285349Sluigi for (i = 0; i < n; i++) 2388285349Sluigi BDG_RWINIT(&b[i]); 2389285349Sluigi return b; 2390285349Sluigi} 2391285349Sluigi 2392259412Sluigivoid 2393285349Sluiginetmap_uninit_bridges2(struct nm_bridge *b, u_int n) 2394259412Sluigi{ 2395259412Sluigi int i; 2396285349Sluigi 2397285349Sluigi if (b == NULL) 2398285349Sluigi return; 2399285349Sluigi 2400285349Sluigi for (i = 0; i < n; i++) 2401285349Sluigi BDG_RWDESTROY(&b[i]); 2402285349Sluigi free(b, M_DEVBUF); 2403259412Sluigi} 2404285349Sluigi 2405285349Sluigiint 2406285349Sluiginetmap_init_bridges(void) 2407285349Sluigi{ 2408285349Sluigi#ifdef CONFIG_NET_NS 2409285349Sluigi return netmap_bns_register(); 2410285349Sluigi#else 2411285349Sluigi nm_bridges = netmap_init_bridges2(NM_BRIDGES); 2412285349Sluigi if (nm_bridges == NULL) 2413285349Sluigi return ENOMEM; 2414285349Sluigi return 0; 2415285349Sluigi#endif 2416285349Sluigi} 2417285349Sluigi 2418285349Sluigivoid 2419285349Sluiginetmap_uninit_bridges(void) 2420285349Sluigi{ 2421285349Sluigi#ifdef CONFIG_NET_NS 2422285349Sluigi netmap_bns_unregister(); 2423285349Sluigi#else 2424285349Sluigi netmap_uninit_bridges2(nm_bridges, NM_BRIDGES); 2425285349Sluigi#endif 2426285349Sluigi} 2427259412Sluigi#endif /* WITH_VALE */ 2428