netmap_vale.c revision 267128
1259412Sluigi/* 2260368Sluigi * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. 3259412Sluigi * 4259412Sluigi * Redistribution and use in source and binary forms, with or without 5259412Sluigi * modification, are permitted provided that the following conditions 6259412Sluigi * are met: 7259412Sluigi * 1. Redistributions of source code must retain the above copyright 8259412Sluigi * notice, this list of conditions and the following disclaimer. 9259412Sluigi * 2. Redistributions in binary form must reproduce the above copyright 10259412Sluigi * notice, this list of conditions and the following disclaimer in the 11259412Sluigi * documentation and/or other materials provided with the distribution. 12259412Sluigi * 13259412Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14259412Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15259412Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16259412Sluigi * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17259412Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18259412Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19259412Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20259412Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21259412Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22259412Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23259412Sluigi * SUCH DAMAGE. 24259412Sluigi */ 25259412Sluigi 26259412Sluigi 27259412Sluigi/* 28259412Sluigi * This module implements the VALE switch for netmap 29259412Sluigi 30259412Sluigi--- VALE SWITCH --- 31259412Sluigi 32259412SluigiNMG_LOCK() serializes all modifications to switches and ports. 33259412SluigiA switch cannot be deleted until all ports are gone. 34259412Sluigi 35259412SluigiFor each switch, an SX lock (RWlock on linux) protects 36259412Sluigideletion of ports. When configuring or deleting a new port, the 37259412Sluigilock is acquired in exclusive mode (after holding NMG_LOCK). 38259412SluigiWhen forwarding, the lock is acquired in shared mode (without NMG_LOCK). 39259412SluigiThe lock is held throughout the entire forwarding cycle, 40259412Sluigiduring which the thread may incur in a page fault. 41259412SluigiHence it is important that sleepable shared locks are used. 42259412Sluigi 43259412SluigiOn the rx ring, the per-port lock is grabbed initially to reserve 44259412Sluigia number of slot in the ring, then the lock is released, 45259412Sluigipackets are copied from source to destination, and then 46259412Sluigithe lock is acquired again and the receive ring is updated. 47259412Sluigi(A similar thing is done on the tx ring for NIC and host stack 48259412Sluigiports attached to the switch) 49259412Sluigi 50259412Sluigi */ 51259412Sluigi 52259412Sluigi/* 53259412Sluigi * OS-specific code that is used only within this file. 54259412Sluigi * Other OS-specific code that must be accessed by drivers 55259412Sluigi * is present in netmap_kern.h 56259412Sluigi */ 57259412Sluigi 58259412Sluigi#if defined(__FreeBSD__) 59259412Sluigi#include <sys/cdefs.h> /* prerequisite */ 60259412Sluigi__FBSDID("$FreeBSD: head/sys/dev/netmap/netmap_vale.c 267128 2014-06-05 21:12:41Z luigi $"); 61259412Sluigi 62259412Sluigi#include <sys/types.h> 63259412Sluigi#include <sys/errno.h> 64259412Sluigi#include <sys/param.h> /* defines used in kernel.h */ 65259412Sluigi#include <sys/kernel.h> /* types used in module initialization */ 66259412Sluigi#include <sys/conf.h> /* cdevsw struct, UID, GID */ 67259412Sluigi#include <sys/sockio.h> 68259412Sluigi#include <sys/socketvar.h> /* struct socket */ 69259412Sluigi#include <sys/malloc.h> 70259412Sluigi#include <sys/poll.h> 71259412Sluigi#include <sys/rwlock.h> 72259412Sluigi#include <sys/socket.h> /* sockaddrs */ 73259412Sluigi#include <sys/selinfo.h> 74259412Sluigi#include <sys/sysctl.h> 75259412Sluigi#include <net/if.h> 76259412Sluigi#include <net/if_var.h> 77259412Sluigi#include <net/bpf.h> /* BIOCIMMEDIATE */ 78259412Sluigi#include <machine/bus.h> /* bus_dmamap_* */ 79259412Sluigi#include <sys/endian.h> 80259412Sluigi#include <sys/refcount.h> 81259412Sluigi 82259412Sluigi 83259412Sluigi#define BDG_RWLOCK_T struct rwlock // struct rwlock 84259412Sluigi 85259412Sluigi#define BDG_RWINIT(b) \ 86259412Sluigi rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS) 87259412Sluigi#define BDG_WLOCK(b) rw_wlock(&(b)->bdg_lock) 88259412Sluigi#define BDG_WUNLOCK(b) rw_wunlock(&(b)->bdg_lock) 89259412Sluigi#define BDG_RLOCK(b) rw_rlock(&(b)->bdg_lock) 90259412Sluigi#define BDG_RTRYLOCK(b) rw_try_rlock(&(b)->bdg_lock) 91259412Sluigi#define BDG_RUNLOCK(b) rw_runlock(&(b)->bdg_lock) 92259412Sluigi#define BDG_RWDESTROY(b) rw_destroy(&(b)->bdg_lock) 93259412Sluigi 94259412Sluigi 95259412Sluigi#elif defined(linux) 96259412Sluigi 97259412Sluigi#include "bsd_glue.h" 98259412Sluigi 99259412Sluigi#elif defined(__APPLE__) 100259412Sluigi 101259412Sluigi#warning OSX support is only partial 102259412Sluigi#include "osx_glue.h" 103259412Sluigi 104259412Sluigi#else 105259412Sluigi 106259412Sluigi#error Unsupported platform 107259412Sluigi 108259412Sluigi#endif /* unsupported */ 109259412Sluigi 110259412Sluigi/* 111259412Sluigi * common headers 112259412Sluigi */ 113259412Sluigi 114259412Sluigi#include <net/netmap.h> 115259412Sluigi#include <dev/netmap/netmap_kern.h> 116259412Sluigi#include <dev/netmap/netmap_mem2.h> 117259412Sluigi 118259412Sluigi#ifdef WITH_VALE 119259412Sluigi 120259412Sluigi/* 121259412Sluigi * system parameters (most of them in netmap_kern.h) 122259412Sluigi * NM_NAME prefix for switch port names, default "vale" 123259412Sluigi * NM_BDG_MAXPORTS number of ports 124259412Sluigi * NM_BRIDGES max number of switches in the system. 125259412Sluigi * XXX should become a sysctl or tunable 126259412Sluigi * 127259412Sluigi * Switch ports are named valeX:Y where X is the switch name and Y 128259412Sluigi * is the port. If Y matches a physical interface name, the port is 129259412Sluigi * connected to a physical device. 130259412Sluigi * 131259412Sluigi * Unlike physical interfaces, switch ports use their own memory region 132259412Sluigi * for rings and buffers. 133259412Sluigi * The virtual interfaces use per-queue lock instead of core lock. 134259412Sluigi * In the tx loop, we aggregate traffic in batches to make all operations 135259412Sluigi * faster. The batch size is bridge_batch. 136259412Sluigi */ 137259412Sluigi#define NM_BDG_MAXRINGS 16 /* XXX unclear how many. */ 138259412Sluigi#define NM_BDG_MAXSLOTS 4096 /* XXX same as above */ 139259412Sluigi#define NM_BRIDGE_RINGSIZE 1024 /* in the device */ 140259412Sluigi#define NM_BDG_HASH 1024 /* forwarding table entries */ 141259412Sluigi#define NM_BDG_BATCH 1024 /* entries in the forwarding buffer */ 142259412Sluigi#define NM_MULTISEG 64 /* max size of a chain of bufs */ 143259412Sluigi/* actual size of the tables */ 144259412Sluigi#define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NM_MULTISEG) 145259412Sluigi/* NM_FT_NULL terminates a list of slots in the ft */ 146259412Sluigi#define NM_FT_NULL NM_BDG_BATCH_MAX 147259412Sluigi#define NM_BRIDGES 8 /* number of bridges */ 148259412Sluigi 149259412Sluigi 150259412Sluigi/* 151259412Sluigi * bridge_batch is set via sysctl to the max batch size to be 152259412Sluigi * used in the bridge. The actual value may be larger as the 153259412Sluigi * last packet in the block may overflow the size. 154259412Sluigi */ 155259412Sluigiint bridge_batch = NM_BDG_BATCH; /* bridge batch size */ 156259412SluigiSYSCTL_DECL(_dev_netmap); 157259412SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , ""); 158259412Sluigi 159259412Sluigi 160259412Sluigistatic int bdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp); 161259412Sluigistatic int bdg_netmap_reg(struct netmap_adapter *na, int onoff); 162259412Sluigistatic int netmap_bwrap_attach(struct ifnet *, struct ifnet *); 163259412Sluigistatic int netmap_bwrap_register(struct netmap_adapter *, int onoff); 164259412Sluigiint kern_netmap_regif(struct nmreq *nmr); 165259412Sluigi 166259412Sluigi/* 167259412Sluigi * For each output interface, nm_bdg_q is used to construct a list. 168259412Sluigi * bq_len is the number of output buffers (we can have coalescing 169259412Sluigi * during the copy). 170259412Sluigi */ 171259412Sluigistruct nm_bdg_q { 172259412Sluigi uint16_t bq_head; 173259412Sluigi uint16_t bq_tail; 174259412Sluigi uint32_t bq_len; /* number of buffers */ 175259412Sluigi}; 176259412Sluigi 177259412Sluigi/* XXX revise this */ 178259412Sluigistruct nm_hash_ent { 179259412Sluigi uint64_t mac; /* the top 2 bytes are the epoch */ 180259412Sluigi uint64_t ports; 181259412Sluigi}; 182259412Sluigi 183259412Sluigi/* 184259412Sluigi * nm_bridge is a descriptor for a VALE switch. 185259412Sluigi * Interfaces for a bridge are all in bdg_ports[]. 186259412Sluigi * The array has fixed size, an empty entry does not terminate 187259412Sluigi * the search, but lookups only occur on attach/detach so we 188259412Sluigi * don't mind if they are slow. 189259412Sluigi * 190259412Sluigi * The bridge is non blocking on the transmit ports: excess 191259412Sluigi * packets are dropped if there is no room on the output port. 192259412Sluigi * 193259412Sluigi * bdg_lock protects accesses to the bdg_ports array. 194259412Sluigi * This is a rw lock (or equivalent). 195259412Sluigi */ 196259412Sluigistruct nm_bridge { 197259412Sluigi /* XXX what is the proper alignment/layout ? */ 198259412Sluigi BDG_RWLOCK_T bdg_lock; /* protects bdg_ports */ 199259412Sluigi int bdg_namelen; 200259412Sluigi uint32_t bdg_active_ports; /* 0 means free */ 201259412Sluigi char bdg_basename[IFNAMSIZ]; 202259412Sluigi 203259412Sluigi /* Indexes of active ports (up to active_ports) 204259412Sluigi * and all other remaining ports. 205259412Sluigi */ 206259412Sluigi uint8_t bdg_port_index[NM_BDG_MAXPORTS]; 207259412Sluigi 208259412Sluigi struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS]; 209259412Sluigi 210259412Sluigi 211259412Sluigi /* 212259412Sluigi * The function to decide the destination port. 213259412Sluigi * It returns either of an index of the destination port, 214259412Sluigi * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to 215259412Sluigi * forward this packet. ring_nr is the source ring index, and the 216259412Sluigi * function may overwrite this value to forward this packet to a 217259412Sluigi * different ring index. 218259412Sluigi * This function must be set by netmap_bdgctl(). 219259412Sluigi */ 220259412Sluigi bdg_lookup_fn_t nm_bdg_lookup; 221259412Sluigi 222259412Sluigi /* the forwarding table, MAC+ports. 223259412Sluigi * XXX should be changed to an argument to be passed to 224259412Sluigi * the lookup function, and allocated on attach 225259412Sluigi */ 226259412Sluigi struct nm_hash_ent ht[NM_BDG_HASH]; 227259412Sluigi}; 228259412Sluigi 229259412Sluigi 230259412Sluigi/* 231259412Sluigi * XXX in principle nm_bridges could be created dynamically 232259412Sluigi * Right now we have a static array and deletions are protected 233259412Sluigi * by an exclusive lock. 234259412Sluigi */ 235259412Sluigistruct nm_bridge nm_bridges[NM_BRIDGES]; 236259412Sluigi 237259412Sluigi 238259412Sluigi/* 239259412Sluigi * this is a slightly optimized copy routine which rounds 240259412Sluigi * to multiple of 64 bytes and is often faster than dealing 241259412Sluigi * with other odd sizes. We assume there is enough room 242259412Sluigi * in the source and destination buffers. 243259412Sluigi * 244259412Sluigi * XXX only for multiples of 64 bytes, non overlapped. 245259412Sluigi */ 246259412Sluigistatic inline void 247259412Sluigipkt_copy(void *_src, void *_dst, int l) 248259412Sluigi{ 249259412Sluigi uint64_t *src = _src; 250259412Sluigi uint64_t *dst = _dst; 251259412Sluigi if (unlikely(l >= 1024)) { 252259412Sluigi memcpy(dst, src, l); 253259412Sluigi return; 254259412Sluigi } 255259412Sluigi for (; likely(l > 0); l-=64) { 256259412Sluigi *dst++ = *src++; 257259412Sluigi *dst++ = *src++; 258259412Sluigi *dst++ = *src++; 259259412Sluigi *dst++ = *src++; 260259412Sluigi *dst++ = *src++; 261259412Sluigi *dst++ = *src++; 262259412Sluigi *dst++ = *src++; 263259412Sluigi *dst++ = *src++; 264259412Sluigi } 265259412Sluigi} 266259412Sluigi 267259412Sluigi 268259412Sluigi/* 269259412Sluigi * locate a bridge among the existing ones. 270259412Sluigi * MUST BE CALLED WITH NMG_LOCK() 271259412Sluigi * 272259412Sluigi * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME. 273259412Sluigi * We assume that this is called with a name of at least NM_NAME chars. 274259412Sluigi */ 275259412Sluigistatic struct nm_bridge * 276259412Sluiginm_find_bridge(const char *name, int create) 277259412Sluigi{ 278259412Sluigi int i, l, namelen; 279259412Sluigi struct nm_bridge *b = NULL; 280259412Sluigi 281259412Sluigi NMG_LOCK_ASSERT(); 282259412Sluigi 283259412Sluigi namelen = strlen(NM_NAME); /* base length */ 284259412Sluigi l = name ? strlen(name) : 0; /* actual length */ 285259412Sluigi if (l < namelen) { 286259412Sluigi D("invalid bridge name %s", name ? name : NULL); 287259412Sluigi return NULL; 288259412Sluigi } 289259412Sluigi for (i = namelen + 1; i < l; i++) { 290259412Sluigi if (name[i] == ':') { 291259412Sluigi namelen = i; 292259412Sluigi break; 293259412Sluigi } 294259412Sluigi } 295259412Sluigi if (namelen >= IFNAMSIZ) 296259412Sluigi namelen = IFNAMSIZ; 297259412Sluigi ND("--- prefix is '%.*s' ---", namelen, name); 298259412Sluigi 299259412Sluigi /* lookup the name, remember empty slot if there is one */ 300259412Sluigi for (i = 0; i < NM_BRIDGES; i++) { 301259412Sluigi struct nm_bridge *x = nm_bridges + i; 302259412Sluigi 303259412Sluigi if (x->bdg_active_ports == 0) { 304259412Sluigi if (create && b == NULL) 305259412Sluigi b = x; /* record empty slot */ 306259412Sluigi } else if (x->bdg_namelen != namelen) { 307259412Sluigi continue; 308259412Sluigi } else if (strncmp(name, x->bdg_basename, namelen) == 0) { 309259412Sluigi ND("found '%.*s' at %d", namelen, name, i); 310259412Sluigi b = x; 311259412Sluigi break; 312259412Sluigi } 313259412Sluigi } 314259412Sluigi if (i == NM_BRIDGES && b) { /* name not found, can create entry */ 315259412Sluigi /* initialize the bridge */ 316259412Sluigi strncpy(b->bdg_basename, name, namelen); 317259412Sluigi ND("create new bridge %s with ports %d", b->bdg_basename, 318259412Sluigi b->bdg_active_ports); 319259412Sluigi b->bdg_namelen = namelen; 320259412Sluigi b->bdg_active_ports = 0; 321259412Sluigi for (i = 0; i < NM_BDG_MAXPORTS; i++) 322259412Sluigi b->bdg_port_index[i] = i; 323259412Sluigi /* set the default function */ 324259412Sluigi b->nm_bdg_lookup = netmap_bdg_learning; 325259412Sluigi /* reset the MAC address table */ 326259412Sluigi bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH); 327259412Sluigi } 328259412Sluigi return b; 329259412Sluigi} 330259412Sluigi 331259412Sluigi 332259412Sluigi/* 333259412Sluigi * Free the forwarding tables for rings attached to switch ports. 334259412Sluigi */ 335259412Sluigistatic void 336259412Sluiginm_free_bdgfwd(struct netmap_adapter *na) 337259412Sluigi{ 338259412Sluigi int nrings, i; 339259412Sluigi struct netmap_kring *kring; 340259412Sluigi 341259412Sluigi NMG_LOCK_ASSERT(); 342260368Sluigi nrings = na->num_tx_rings; 343260368Sluigi kring = na->tx_rings; 344259412Sluigi for (i = 0; i < nrings; i++) { 345259412Sluigi if (kring[i].nkr_ft) { 346259412Sluigi free(kring[i].nkr_ft, M_DEVBUF); 347259412Sluigi kring[i].nkr_ft = NULL; /* protect from freeing twice */ 348259412Sluigi } 349259412Sluigi } 350259412Sluigi} 351259412Sluigi 352259412Sluigi 353259412Sluigi/* 354259412Sluigi * Allocate the forwarding tables for the rings attached to the bridge ports. 355259412Sluigi */ 356259412Sluigistatic int 357259412Sluiginm_alloc_bdgfwd(struct netmap_adapter *na) 358259412Sluigi{ 359259412Sluigi int nrings, l, i, num_dstq; 360259412Sluigi struct netmap_kring *kring; 361259412Sluigi 362259412Sluigi NMG_LOCK_ASSERT(); 363259412Sluigi /* all port:rings + broadcast */ 364259412Sluigi num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1; 365259412Sluigi l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX; 366259412Sluigi l += sizeof(struct nm_bdg_q) * num_dstq; 367259412Sluigi l += sizeof(uint16_t) * NM_BDG_BATCH_MAX; 368259412Sluigi 369261909Sluigi nrings = netmap_real_tx_rings(na); 370259412Sluigi kring = na->tx_rings; 371259412Sluigi for (i = 0; i < nrings; i++) { 372259412Sluigi struct nm_bdg_fwd *ft; 373259412Sluigi struct nm_bdg_q *dstq; 374259412Sluigi int j; 375259412Sluigi 376259412Sluigi ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO); 377259412Sluigi if (!ft) { 378259412Sluigi nm_free_bdgfwd(na); 379259412Sluigi return ENOMEM; 380259412Sluigi } 381259412Sluigi dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); 382259412Sluigi for (j = 0; j < num_dstq; j++) { 383259412Sluigi dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL; 384259412Sluigi dstq[j].bq_len = 0; 385259412Sluigi } 386259412Sluigi kring[i].nkr_ft = ft; 387259412Sluigi } 388259412Sluigi return 0; 389259412Sluigi} 390259412Sluigi 391259412Sluigi 392259412Sluigistatic void 393259412Sluiginetmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw) 394259412Sluigi{ 395259412Sluigi int s_hw = hw, s_sw = sw; 396259412Sluigi int i, lim =b->bdg_active_ports; 397259412Sluigi uint8_t tmp[NM_BDG_MAXPORTS]; 398259412Sluigi 399259412Sluigi /* 400259412Sluigi New algorithm: 401259412Sluigi make a copy of bdg_port_index; 402259412Sluigi lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port 403259412Sluigi in the array of bdg_port_index, replacing them with 404259412Sluigi entries from the bottom of the array; 405259412Sluigi decrement bdg_active_ports; 406259412Sluigi acquire BDG_WLOCK() and copy back the array. 407259412Sluigi */ 408259412Sluigi 409261909Sluigi if (netmap_verbose) 410261909Sluigi D("detach %d and %d (lim %d)", hw, sw, lim); 411259412Sluigi /* make a copy of the list of active ports, update it, 412259412Sluigi * and then copy back within BDG_WLOCK(). 413259412Sluigi */ 414259412Sluigi memcpy(tmp, b->bdg_port_index, sizeof(tmp)); 415259412Sluigi for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) { 416259412Sluigi if (hw >= 0 && tmp[i] == hw) { 417259412Sluigi ND("detach hw %d at %d", hw, i); 418259412Sluigi lim--; /* point to last active port */ 419259412Sluigi tmp[i] = tmp[lim]; /* swap with i */ 420259412Sluigi tmp[lim] = hw; /* now this is inactive */ 421259412Sluigi hw = -1; 422259412Sluigi } else if (sw >= 0 && tmp[i] == sw) { 423259412Sluigi ND("detach sw %d at %d", sw, i); 424259412Sluigi lim--; 425259412Sluigi tmp[i] = tmp[lim]; 426259412Sluigi tmp[lim] = sw; 427259412Sluigi sw = -1; 428259412Sluigi } else { 429259412Sluigi i++; 430259412Sluigi } 431259412Sluigi } 432259412Sluigi if (hw >= 0 || sw >= 0) { 433259412Sluigi D("XXX delete failed hw %d sw %d, should panic...", hw, sw); 434259412Sluigi } 435259412Sluigi 436259412Sluigi BDG_WLOCK(b); 437259412Sluigi b->bdg_ports[s_hw] = NULL; 438259412Sluigi if (s_sw >= 0) { 439259412Sluigi b->bdg_ports[s_sw] = NULL; 440259412Sluigi } 441259412Sluigi memcpy(b->bdg_port_index, tmp, sizeof(tmp)); 442259412Sluigi b->bdg_active_ports = lim; 443259412Sluigi BDG_WUNLOCK(b); 444259412Sluigi 445259412Sluigi ND("now %d active ports", lim); 446259412Sluigi if (lim == 0) { 447259412Sluigi ND("marking bridge %s as free", b->bdg_basename); 448259412Sluigi b->nm_bdg_lookup = NULL; 449259412Sluigi } 450259412Sluigi} 451259412Sluigi 452260368Sluigi 453259412Sluigistatic void 454259412Sluiginetmap_adapter_vp_dtor(struct netmap_adapter *na) 455259412Sluigi{ 456259412Sluigi struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na; 457259412Sluigi struct nm_bridge *b = vpna->na_bdg; 458259412Sluigi struct ifnet *ifp = na->ifp; 459259412Sluigi 460259412Sluigi ND("%s has %d references", NM_IFPNAME(ifp), na->na_refcount); 461259412Sluigi 462259412Sluigi if (b) { 463259412Sluigi netmap_bdg_detach_common(b, vpna->bdg_port, -1); 464259412Sluigi } 465259412Sluigi 466259412Sluigi bzero(ifp, sizeof(*ifp)); 467259412Sluigi free(ifp, M_DEVBUF); 468259412Sluigi na->ifp = NULL; 469259412Sluigi} 470259412Sluigi 471260368Sluigi 472260368Sluigi/* Try to get a reference to a netmap adapter attached to a VALE switch. 473260368Sluigi * If the adapter is found (or is created), this function returns 0, a 474260368Sluigi * non NULL pointer is returned into *na, and the caller holds a 475260368Sluigi * reference to the adapter. 476260368Sluigi * If an adapter is not found, then no reference is grabbed and the 477260368Sluigi * function returns an error code, or 0 if there is just a VALE prefix 478260368Sluigi * mismatch. Therefore the caller holds a reference when 479260368Sluigi * (*na != NULL && return == 0). 480260368Sluigi */ 481259412Sluigiint 482259412Sluiginetmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create) 483259412Sluigi{ 484259412Sluigi const char *name = nmr->nr_name; 485259412Sluigi struct ifnet *ifp; 486259412Sluigi int error = 0; 487259412Sluigi struct netmap_adapter *ret; 488259412Sluigi struct netmap_vp_adapter *vpna; 489259412Sluigi struct nm_bridge *b; 490259412Sluigi int i, j, cand = -1, cand2 = -1; 491259412Sluigi int needed; 492259412Sluigi 493259412Sluigi *na = NULL; /* default return value */ 494259412Sluigi 495259412Sluigi /* first try to see if this is a bridge port. */ 496259412Sluigi NMG_LOCK_ASSERT(); 497259412Sluigi if (strncmp(name, NM_NAME, sizeof(NM_NAME) - 1)) { 498259412Sluigi return 0; /* no error, but no VALE prefix */ 499259412Sluigi } 500259412Sluigi 501259412Sluigi b = nm_find_bridge(name, create); 502259412Sluigi if (b == NULL) { 503259412Sluigi D("no bridges available for '%s'", name); 504260700Sluigi return (create ? ENOMEM : ENXIO); 505259412Sluigi } 506259412Sluigi 507259412Sluigi /* Now we are sure that name starts with the bridge's name, 508259412Sluigi * lookup the port in the bridge. We need to scan the entire 509259412Sluigi * list. It is not important to hold a WLOCK on the bridge 510259412Sluigi * during the search because NMG_LOCK already guarantees 511259412Sluigi * that there are no other possible writers. 512259412Sluigi */ 513259412Sluigi 514259412Sluigi /* lookup in the local list of ports */ 515259412Sluigi for (j = 0; j < b->bdg_active_ports; j++) { 516259412Sluigi i = b->bdg_port_index[j]; 517259412Sluigi vpna = b->bdg_ports[i]; 518259412Sluigi // KASSERT(na != NULL); 519259412Sluigi ifp = vpna->up.ifp; 520259412Sluigi /* XXX make sure the name only contains one : */ 521259412Sluigi if (!strcmp(NM_IFPNAME(ifp), name)) { 522259412Sluigi netmap_adapter_get(&vpna->up); 523259412Sluigi ND("found existing if %s refs %d", name, 524259412Sluigi vpna->na_bdg_refcount); 525259412Sluigi *na = (struct netmap_adapter *)vpna; 526259412Sluigi return 0; 527259412Sluigi } 528259412Sluigi } 529259412Sluigi /* not found, should we create it? */ 530259412Sluigi if (!create) 531259412Sluigi return ENXIO; 532259412Sluigi /* yes we should, see if we have space to attach entries */ 533259412Sluigi needed = 2; /* in some cases we only need 1 */ 534259412Sluigi if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) { 535259412Sluigi D("bridge full %d, cannot create new port", b->bdg_active_ports); 536260700Sluigi return ENOMEM; 537259412Sluigi } 538259412Sluigi /* record the next two ports available, but do not allocate yet */ 539259412Sluigi cand = b->bdg_port_index[b->bdg_active_ports]; 540259412Sluigi cand2 = b->bdg_port_index[b->bdg_active_ports + 1]; 541259412Sluigi ND("+++ bridge %s port %s used %d avail %d %d", 542259412Sluigi b->bdg_basename, name, b->bdg_active_ports, cand, cand2); 543259412Sluigi 544259412Sluigi /* 545259412Sluigi * try see if there is a matching NIC with this name 546259412Sluigi * (after the bridge's name) 547259412Sluigi */ 548259412Sluigi ifp = ifunit_ref(name + b->bdg_namelen + 1); 549259412Sluigi if (!ifp) { /* this is a virtual port */ 550259412Sluigi if (nmr->nr_cmd) { 551259412Sluigi /* nr_cmd must be 0 for a virtual port */ 552259412Sluigi return EINVAL; 553259412Sluigi } 554259412Sluigi 555259412Sluigi /* create a struct ifnet for the new port. 556259412Sluigi * need M_NOWAIT as we are under nma_lock 557259412Sluigi */ 558259412Sluigi ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO); 559259412Sluigi if (!ifp) 560259412Sluigi return ENOMEM; 561259412Sluigi 562259412Sluigi strcpy(ifp->if_xname, name); 563259412Sluigi /* bdg_netmap_attach creates a struct netmap_adapter */ 564259412Sluigi error = bdg_netmap_attach(nmr, ifp); 565259412Sluigi if (error) { 566259412Sluigi D("error %d", error); 567259412Sluigi free(ifp, M_DEVBUF); 568259412Sluigi return error; 569259412Sluigi } 570259412Sluigi ret = NA(ifp); 571259412Sluigi cand2 = -1; /* only need one port */ 572259412Sluigi } else { /* this is a NIC */ 573259412Sluigi struct ifnet *fake_ifp; 574259412Sluigi 575259412Sluigi error = netmap_get_hw_na(ifp, &ret); 576259412Sluigi if (error || ret == NULL) 577259412Sluigi goto out; 578259412Sluigi 579259412Sluigi /* make sure the NIC is not already in use */ 580259412Sluigi if (NETMAP_OWNED_BY_ANY(ret)) { 581259412Sluigi D("NIC %s busy, cannot attach to bridge", 582259412Sluigi NM_IFPNAME(ifp)); 583260700Sluigi error = EBUSY; 584259412Sluigi goto out; 585259412Sluigi } 586259412Sluigi /* create a fake interface */ 587259412Sluigi fake_ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO); 588259412Sluigi if (!fake_ifp) { 589259412Sluigi error = ENOMEM; 590259412Sluigi goto out; 591259412Sluigi } 592259412Sluigi strcpy(fake_ifp->if_xname, name); 593259412Sluigi error = netmap_bwrap_attach(fake_ifp, ifp); 594259412Sluigi if (error) { 595259412Sluigi free(fake_ifp, M_DEVBUF); 596259412Sluigi goto out; 597259412Sluigi } 598259412Sluigi ret = NA(fake_ifp); 599259412Sluigi if (nmr->nr_arg1 != NETMAP_BDG_HOST) 600259412Sluigi cand2 = -1; /* only need one port */ 601259412Sluigi if_rele(ifp); 602259412Sluigi } 603259412Sluigi vpna = (struct netmap_vp_adapter *)ret; 604259412Sluigi 605259412Sluigi BDG_WLOCK(b); 606259412Sluigi vpna->bdg_port = cand; 607259412Sluigi ND("NIC %p to bridge port %d", vpna, cand); 608259412Sluigi /* bind the port to the bridge (virtual ports are not active) */ 609259412Sluigi b->bdg_ports[cand] = vpna; 610259412Sluigi vpna->na_bdg = b; 611259412Sluigi b->bdg_active_ports++; 612259412Sluigi if (cand2 >= 0) { 613259412Sluigi struct netmap_vp_adapter *hostna = vpna + 1; 614259412Sluigi /* also bind the host stack to the bridge */ 615259412Sluigi b->bdg_ports[cand2] = hostna; 616259412Sluigi hostna->bdg_port = cand2; 617259412Sluigi hostna->na_bdg = b; 618259412Sluigi b->bdg_active_ports++; 619259412Sluigi ND("host %p to bridge port %d", hostna, cand2); 620259412Sluigi } 621259412Sluigi ND("if %s refs %d", name, vpna->up.na_refcount); 622259412Sluigi BDG_WUNLOCK(b); 623259412Sluigi *na = ret; 624259412Sluigi netmap_adapter_get(ret); 625259412Sluigi return 0; 626259412Sluigi 627259412Sluigiout: 628259412Sluigi if_rele(ifp); 629259412Sluigi 630259412Sluigi return error; 631259412Sluigi} 632259412Sluigi 633259412Sluigi 634259412Sluigi/* Process NETMAP_BDG_ATTACH and NETMAP_BDG_DETACH */ 635259412Sluigistatic int 636259412Sluiginm_bdg_attach(struct nmreq *nmr) 637259412Sluigi{ 638259412Sluigi struct netmap_adapter *na; 639259412Sluigi struct netmap_if *nifp; 640259412Sluigi struct netmap_priv_d *npriv; 641259412Sluigi struct netmap_bwrap_adapter *bna; 642259412Sluigi int error; 643259412Sluigi 644259412Sluigi npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO); 645259412Sluigi if (npriv == NULL) 646259412Sluigi return ENOMEM; 647260700Sluigi 648259412Sluigi NMG_LOCK(); 649260700Sluigi 650260368Sluigi error = netmap_get_bdg_na(nmr, &na, 1 /* create if not exists */); 651259412Sluigi if (error) /* no device, or another bridge or user owns the device */ 652259412Sluigi goto unlock_exit; 653260700Sluigi 654260368Sluigi if (na == NULL) { /* VALE prefix missing */ 655259412Sluigi error = EINVAL; 656260368Sluigi goto unlock_exit; 657259412Sluigi } 658259412Sluigi 659259412Sluigi if (na->active_fds > 0) { /* already registered */ 660259412Sluigi error = EBUSY; 661259412Sluigi goto unref_exit; 662259412Sluigi } 663259412Sluigi 664261909Sluigi nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags, &error); 665259412Sluigi if (!nifp) { 666259412Sluigi goto unref_exit; 667259412Sluigi } 668259412Sluigi 669259412Sluigi bna = (struct netmap_bwrap_adapter*)na; 670259412Sluigi bna->na_kpriv = npriv; 671259412Sluigi NMG_UNLOCK(); 672259412Sluigi ND("registered %s to netmap-mode", NM_IFPNAME(na->ifp)); 673259412Sluigi return 0; 674259412Sluigi 675259412Sluigiunref_exit: 676259412Sluigi netmap_adapter_put(na); 677259412Sluigiunlock_exit: 678259412Sluigi NMG_UNLOCK(); 679259412Sluigi bzero(npriv, sizeof(*npriv)); 680259412Sluigi free(npriv, M_DEVBUF); 681259412Sluigi return error; 682259412Sluigi} 683259412Sluigi 684260368Sluigi 685259412Sluigistatic int 686259412Sluiginm_bdg_detach(struct nmreq *nmr) 687259412Sluigi{ 688259412Sluigi struct netmap_adapter *na; 689259412Sluigi int error; 690259412Sluigi struct netmap_bwrap_adapter *bna; 691259412Sluigi int last_instance; 692259412Sluigi 693259412Sluigi NMG_LOCK(); 694260368Sluigi error = netmap_get_bdg_na(nmr, &na, 0 /* don't create */); 695259412Sluigi if (error) { /* no device, or another bridge or user owns the device */ 696259412Sluigi goto unlock_exit; 697259412Sluigi } 698260700Sluigi 699260368Sluigi if (na == NULL) { /* VALE prefix missing */ 700259412Sluigi error = EINVAL; 701260368Sluigi goto unlock_exit; 702259412Sluigi } 703260368Sluigi 704259412Sluigi bna = (struct netmap_bwrap_adapter *)na; 705259412Sluigi 706259412Sluigi if (na->active_fds == 0) { /* not registered */ 707259412Sluigi error = EINVAL; 708259412Sluigi goto unref_exit; 709259412Sluigi } 710259412Sluigi 711259412Sluigi last_instance = netmap_dtor_locked(bna->na_kpriv); /* unregister */ 712259412Sluigi if (!last_instance) { 713259412Sluigi D("--- error, trying to detach an entry with active mmaps"); 714259412Sluigi error = EINVAL; 715259412Sluigi } else { 716259412Sluigi struct netmap_priv_d *npriv = bna->na_kpriv; 717259412Sluigi 718259412Sluigi bna->na_kpriv = NULL; 719259412Sluigi D("deleting priv"); 720259412Sluigi 721259412Sluigi bzero(npriv, sizeof(*npriv)); 722259412Sluigi free(npriv, M_DEVBUF); 723259412Sluigi } 724259412Sluigi 725259412Sluigiunref_exit: 726259412Sluigi netmap_adapter_put(na); 727259412Sluigiunlock_exit: 728259412Sluigi NMG_UNLOCK(); 729259412Sluigi return error; 730259412Sluigi 731259412Sluigi} 732259412Sluigi 733259412Sluigi 734259412Sluigi/* exported to kernel callers, e.g. OVS ? 735259412Sluigi * Entry point. 736259412Sluigi * Called without NMG_LOCK. 737259412Sluigi */ 738259412Sluigiint 739259412Sluiginetmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func) 740259412Sluigi{ 741259412Sluigi struct nm_bridge *b; 742259412Sluigi struct netmap_adapter *na; 743259412Sluigi struct netmap_vp_adapter *vpna; 744259412Sluigi struct ifnet *iter; 745259412Sluigi char *name = nmr->nr_name; 746259412Sluigi int cmd = nmr->nr_cmd, namelen = strlen(name); 747259412Sluigi int error = 0, i, j; 748259412Sluigi 749259412Sluigi switch (cmd) { 750259412Sluigi case NETMAP_BDG_ATTACH: 751259412Sluigi error = nm_bdg_attach(nmr); 752259412Sluigi break; 753259412Sluigi 754259412Sluigi case NETMAP_BDG_DETACH: 755259412Sluigi error = nm_bdg_detach(nmr); 756259412Sluigi break; 757259412Sluigi 758259412Sluigi case NETMAP_BDG_LIST: 759259412Sluigi /* this is used to enumerate bridges and ports */ 760259412Sluigi if (namelen) { /* look up indexes of bridge and port */ 761259412Sluigi if (strncmp(name, NM_NAME, strlen(NM_NAME))) { 762259412Sluigi error = EINVAL; 763259412Sluigi break; 764259412Sluigi } 765259412Sluigi NMG_LOCK(); 766259412Sluigi b = nm_find_bridge(name, 0 /* don't create */); 767259412Sluigi if (!b) { 768259412Sluigi error = ENOENT; 769259412Sluigi NMG_UNLOCK(); 770259412Sluigi break; 771259412Sluigi } 772259412Sluigi 773259412Sluigi error = ENOENT; 774259412Sluigi for (j = 0; j < b->bdg_active_ports; j++) { 775259412Sluigi i = b->bdg_port_index[j]; 776259412Sluigi vpna = b->bdg_ports[i]; 777259412Sluigi if (vpna == NULL) { 778259412Sluigi D("---AAAAAAAAARGH-------"); 779259412Sluigi continue; 780259412Sluigi } 781259412Sluigi iter = vpna->up.ifp; 782259412Sluigi /* the former and the latter identify a 783259412Sluigi * virtual port and a NIC, respectively 784259412Sluigi */ 785259412Sluigi if (!strcmp(iter->if_xname, name)) { 786259412Sluigi /* bridge index */ 787259412Sluigi nmr->nr_arg1 = b - nm_bridges; 788259412Sluigi nmr->nr_arg2 = i; /* port index */ 789259412Sluigi error = 0; 790259412Sluigi break; 791259412Sluigi } 792259412Sluigi } 793259412Sluigi NMG_UNLOCK(); 794259412Sluigi } else { 795259412Sluigi /* return the first non-empty entry starting from 796259412Sluigi * bridge nr_arg1 and port nr_arg2. 797259412Sluigi * 798259412Sluigi * Users can detect the end of the same bridge by 799259412Sluigi * seeing the new and old value of nr_arg1, and can 800259412Sluigi * detect the end of all the bridge by error != 0 801259412Sluigi */ 802259412Sluigi i = nmr->nr_arg1; 803259412Sluigi j = nmr->nr_arg2; 804259412Sluigi 805259412Sluigi NMG_LOCK(); 806259412Sluigi for (error = ENOENT; i < NM_BRIDGES; i++) { 807259412Sluigi b = nm_bridges + i; 808259412Sluigi if (j >= b->bdg_active_ports) { 809259412Sluigi j = 0; /* following bridges scan from 0 */ 810259412Sluigi continue; 811259412Sluigi } 812259412Sluigi nmr->nr_arg1 = i; 813259412Sluigi nmr->nr_arg2 = j; 814259412Sluigi j = b->bdg_port_index[j]; 815259412Sluigi vpna = b->bdg_ports[j]; 816259412Sluigi iter = vpna->up.ifp; 817259412Sluigi strncpy(name, iter->if_xname, (size_t)IFNAMSIZ); 818259412Sluigi error = 0; 819259412Sluigi break; 820259412Sluigi } 821259412Sluigi NMG_UNLOCK(); 822259412Sluigi } 823259412Sluigi break; 824259412Sluigi 825259412Sluigi case NETMAP_BDG_LOOKUP_REG: 826259412Sluigi /* register a lookup function to the given bridge. 827259412Sluigi * nmr->nr_name may be just bridge's name (including ':' 828259412Sluigi * if it is not just NM_NAME). 829259412Sluigi */ 830259412Sluigi if (!func) { 831259412Sluigi error = EINVAL; 832259412Sluigi break; 833259412Sluigi } 834259412Sluigi NMG_LOCK(); 835259412Sluigi b = nm_find_bridge(name, 0 /* don't create */); 836259412Sluigi if (!b) { 837259412Sluigi error = EINVAL; 838259412Sluigi } else { 839259412Sluigi b->nm_bdg_lookup = func; 840259412Sluigi } 841259412Sluigi NMG_UNLOCK(); 842259412Sluigi break; 843259412Sluigi 844261909Sluigi case NETMAP_BDG_VNET_HDR: 845261909Sluigi /* Valid lengths for the virtio-net header are 0 (no header), 846261909Sluigi 10 and 12. */ 847261909Sluigi if (nmr->nr_arg1 != 0 && 848261909Sluigi nmr->nr_arg1 != sizeof(struct nm_vnet_hdr) && 849261909Sluigi nmr->nr_arg1 != 12) { 850261909Sluigi error = EINVAL; 851261909Sluigi break; 852261909Sluigi } 853259412Sluigi NMG_LOCK(); 854259412Sluigi error = netmap_get_bdg_na(nmr, &na, 0); 855260368Sluigi if (na && !error) { 856259412Sluigi vpna = (struct netmap_vp_adapter *)na; 857261909Sluigi vpna->virt_hdr_len = nmr->nr_arg1; 858261909Sluigi if (vpna->virt_hdr_len) 859261909Sluigi vpna->mfs = NETMAP_BDG_BUF_SIZE(na->nm_mem); 860261909Sluigi D("Using vnet_hdr_len %d for %p", vpna->virt_hdr_len, vpna); 861260368Sluigi netmap_adapter_put(na); 862259412Sluigi } 863259412Sluigi NMG_UNLOCK(); 864259412Sluigi break; 865259412Sluigi 866259412Sluigi default: 867259412Sluigi D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd); 868259412Sluigi error = EINVAL; 869259412Sluigi break; 870259412Sluigi } 871259412Sluigi return error; 872259412Sluigi} 873259412Sluigi 874259412Sluigistatic int 875259412Sluiginetmap_vp_krings_create(struct netmap_adapter *na) 876259412Sluigi{ 877261909Sluigi u_int tailroom; 878259412Sluigi int error, i; 879259412Sluigi uint32_t *leases; 880261909Sluigi u_int nrx = netmap_real_rx_rings(na); 881259412Sluigi 882259412Sluigi /* 883259412Sluigi * Leases are attached to RX rings on vale ports 884259412Sluigi */ 885259412Sluigi tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx; 886259412Sluigi 887261909Sluigi error = netmap_krings_create(na, tailroom); 888259412Sluigi if (error) 889259412Sluigi return error; 890259412Sluigi 891259412Sluigi leases = na->tailroom; 892259412Sluigi 893259412Sluigi for (i = 0; i < nrx; i++) { /* Receive rings */ 894259412Sluigi na->rx_rings[i].nkr_leases = leases; 895259412Sluigi leases += na->num_rx_desc; 896259412Sluigi } 897259412Sluigi 898259412Sluigi error = nm_alloc_bdgfwd(na); 899259412Sluigi if (error) { 900259412Sluigi netmap_krings_delete(na); 901259412Sluigi return error; 902259412Sluigi } 903259412Sluigi 904259412Sluigi return 0; 905259412Sluigi} 906259412Sluigi 907260368Sluigi 908259412Sluigistatic void 909259412Sluiginetmap_vp_krings_delete(struct netmap_adapter *na) 910259412Sluigi{ 911259412Sluigi nm_free_bdgfwd(na); 912259412Sluigi netmap_krings_delete(na); 913259412Sluigi} 914259412Sluigi 915259412Sluigi 916259412Sluigistatic int 917259412Sluiginm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, 918259412Sluigi struct netmap_vp_adapter *na, u_int ring_nr); 919259412Sluigi 920259412Sluigi 921259412Sluigi/* 922259412Sluigi * Grab packets from a kring, move them into the ft structure 923259412Sluigi * associated to the tx (input) port. Max one instance per port, 924259412Sluigi * filtered on input (ioctl, poll or XXX). 925259412Sluigi * Returns the next position in the ring. 926259412Sluigi */ 927259412Sluigistatic int 928259412Sluiginm_bdg_preflush(struct netmap_vp_adapter *na, u_int ring_nr, 929259412Sluigi struct netmap_kring *kring, u_int end) 930259412Sluigi{ 931259412Sluigi struct netmap_ring *ring = kring->ring; 932259412Sluigi struct nm_bdg_fwd *ft; 933259412Sluigi u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1; 934259412Sluigi u_int ft_i = 0; /* start from 0 */ 935259412Sluigi u_int frags = 1; /* how many frags ? */ 936259412Sluigi struct nm_bridge *b = na->na_bdg; 937259412Sluigi 938259412Sluigi /* To protect against modifications to the bridge we acquire a 939259412Sluigi * shared lock, waiting if we can sleep (if the source port is 940259412Sluigi * attached to a user process) or with a trylock otherwise (NICs). 941259412Sluigi */ 942259412Sluigi ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j); 943259412Sluigi if (na->up.na_flags & NAF_BDG_MAYSLEEP) 944259412Sluigi BDG_RLOCK(b); 945259412Sluigi else if (!BDG_RTRYLOCK(b)) 946259412Sluigi return 0; 947259412Sluigi ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j); 948259412Sluigi ft = kring->nkr_ft; 949259412Sluigi 950259412Sluigi for (; likely(j != end); j = nm_next(j, lim)) { 951259412Sluigi struct netmap_slot *slot = &ring->slot[j]; 952259412Sluigi char *buf; 953259412Sluigi 954259412Sluigi ft[ft_i].ft_len = slot->len; 955259412Sluigi ft[ft_i].ft_flags = slot->flags; 956259412Sluigi 957259412Sluigi ND("flags is 0x%x", slot->flags); 958259412Sluigi /* this slot goes into a list so initialize the link field */ 959259412Sluigi ft[ft_i].ft_next = NM_FT_NULL; 960259412Sluigi buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ? 961259412Sluigi (void *)(uintptr_t)slot->ptr : BDG_NMB(&na->up, slot); 962259487Sluigi __builtin_prefetch(buf); 963259412Sluigi ++ft_i; 964259412Sluigi if (slot->flags & NS_MOREFRAG) { 965259412Sluigi frags++; 966259412Sluigi continue; 967259412Sluigi } 968259412Sluigi if (unlikely(netmap_verbose && frags > 1)) 969259412Sluigi RD(5, "%d frags at %d", frags, ft_i - frags); 970259412Sluigi ft[ft_i - frags].ft_frags = frags; 971259412Sluigi frags = 1; 972259412Sluigi if (unlikely((int)ft_i >= bridge_batch)) 973259412Sluigi ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); 974259412Sluigi } 975259412Sluigi if (frags > 1) { 976259412Sluigi D("truncate incomplete fragment at %d (%d frags)", ft_i, frags); 977259412Sluigi // ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG 978259412Sluigi ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG; 979259412Sluigi ft[ft_i - frags].ft_frags = frags - 1; 980259412Sluigi } 981259412Sluigi if (ft_i) 982259412Sluigi ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); 983259412Sluigi BDG_RUNLOCK(b); 984259412Sluigi return j; 985259412Sluigi} 986259412Sluigi 987259412Sluigi 988259412Sluigi/* ----- FreeBSD if_bridge hash function ------- */ 989259412Sluigi 990259412Sluigi/* 991259412Sluigi * The following hash function is adapted from "Hash Functions" by Bob Jenkins 992259412Sluigi * ("Algorithm Alley", Dr. Dobbs Journal, September 1997). 993259412Sluigi * 994259412Sluigi * http://www.burtleburtle.net/bob/hash/spooky.html 995259412Sluigi */ 996259412Sluigi#define mix(a, b, c) \ 997259412Sluigido { \ 998259412Sluigi a -= b; a -= c; a ^= (c >> 13); \ 999259412Sluigi b -= c; b -= a; b ^= (a << 8); \ 1000259412Sluigi c -= a; c -= b; c ^= (b >> 13); \ 1001259412Sluigi a -= b; a -= c; a ^= (c >> 12); \ 1002259412Sluigi b -= c; b -= a; b ^= (a << 16); \ 1003259412Sluigi c -= a; c -= b; c ^= (b >> 5); \ 1004259412Sluigi a -= b; a -= c; a ^= (c >> 3); \ 1005259412Sluigi b -= c; b -= a; b ^= (a << 10); \ 1006259412Sluigi c -= a; c -= b; c ^= (b >> 15); \ 1007259412Sluigi} while (/*CONSTCOND*/0) 1008259412Sluigi 1009260368Sluigi 1010259412Sluigistatic __inline uint32_t 1011259412Sluiginm_bridge_rthash(const uint8_t *addr) 1012259412Sluigi{ 1013259412Sluigi uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key 1014259412Sluigi 1015259412Sluigi b += addr[5] << 8; 1016259412Sluigi b += addr[4]; 1017259412Sluigi a += addr[3] << 24; 1018259412Sluigi a += addr[2] << 16; 1019259412Sluigi a += addr[1] << 8; 1020259412Sluigi a += addr[0]; 1021259412Sluigi 1022259412Sluigi mix(a, b, c); 1023259412Sluigi#define BRIDGE_RTHASH_MASK (NM_BDG_HASH-1) 1024259412Sluigi return (c & BRIDGE_RTHASH_MASK); 1025259412Sluigi} 1026259412Sluigi 1027259412Sluigi#undef mix 1028259412Sluigi 1029259412Sluigi 1030259412Sluigistatic int 1031259412Sluigibdg_netmap_reg(struct netmap_adapter *na, int onoff) 1032259412Sluigi{ 1033259412Sluigi struct netmap_vp_adapter *vpna = 1034259412Sluigi (struct netmap_vp_adapter*)na; 1035259412Sluigi struct ifnet *ifp = na->ifp; 1036259412Sluigi 1037259412Sluigi /* the interface is already attached to the bridge, 1038259412Sluigi * so we only need to toggle IFCAP_NETMAP. 1039259412Sluigi */ 1040259412Sluigi BDG_WLOCK(vpna->na_bdg); 1041259412Sluigi if (onoff) { 1042259412Sluigi ifp->if_capenable |= IFCAP_NETMAP; 1043259412Sluigi } else { 1044259412Sluigi ifp->if_capenable &= ~IFCAP_NETMAP; 1045259412Sluigi } 1046259412Sluigi BDG_WUNLOCK(vpna->na_bdg); 1047259412Sluigi return 0; 1048259412Sluigi} 1049259412Sluigi 1050259412Sluigi 1051259412Sluigi/* 1052259412Sluigi * Lookup function for a learning bridge. 1053259412Sluigi * Update the hash table with the source address, 1054259412Sluigi * and then returns the destination port index, and the 1055259412Sluigi * ring in *dst_ring (at the moment, always use ring 0) 1056259412Sluigi */ 1057259412Sluigiu_int 1058259412Sluiginetmap_bdg_learning(char *buf, u_int buf_len, uint8_t *dst_ring, 1059259412Sluigi struct netmap_vp_adapter *na) 1060259412Sluigi{ 1061259412Sluigi struct nm_hash_ent *ht = na->na_bdg->ht; 1062259412Sluigi uint32_t sh, dh; 1063259412Sluigi u_int dst, mysrc = na->bdg_port; 1064259412Sluigi uint64_t smac, dmac; 1065259412Sluigi 1066259412Sluigi if (buf_len < 14) { 1067259412Sluigi D("invalid buf length %d", buf_len); 1068259412Sluigi return NM_BDG_NOPORT; 1069259412Sluigi } 1070259412Sluigi dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff; 1071259412Sluigi smac = le64toh(*(uint64_t *)(buf + 4)); 1072259412Sluigi smac >>= 16; 1073259412Sluigi 1074259412Sluigi /* 1075259412Sluigi * The hash is somewhat expensive, there might be some 1076259412Sluigi * worthwhile optimizations here. 1077259412Sluigi */ 1078259412Sluigi if ((buf[6] & 1) == 0) { /* valid src */ 1079259412Sluigi uint8_t *s = buf+6; 1080259412Sluigi sh = nm_bridge_rthash(s); // XXX hash of source 1081259412Sluigi /* update source port forwarding entry */ 1082259412Sluigi ht[sh].mac = smac; /* XXX expire ? */ 1083259412Sluigi ht[sh].ports = mysrc; 1084259412Sluigi if (netmap_verbose) 1085259412Sluigi D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d", 1086259412Sluigi s[0], s[1], s[2], s[3], s[4], s[5], mysrc); 1087259412Sluigi } 1088259412Sluigi dst = NM_BDG_BROADCAST; 1089259412Sluigi if ((buf[0] & 1) == 0) { /* unicast */ 1090259412Sluigi dh = nm_bridge_rthash(buf); // XXX hash of dst 1091259412Sluigi if (ht[dh].mac == dmac) { /* found dst */ 1092259412Sluigi dst = ht[dh].ports; 1093259412Sluigi } 1094259412Sluigi /* XXX otherwise return NM_BDG_UNKNOWN ? */ 1095259412Sluigi } 1096259412Sluigi *dst_ring = 0; 1097259412Sluigi return dst; 1098259412Sluigi} 1099259412Sluigi 1100259412Sluigi 1101259412Sluigi/* 1102260368Sluigi * Available space in the ring. Only used in VALE code 1103260368Sluigi * and only with is_rx = 1 1104260368Sluigi */ 1105260368Sluigistatic inline uint32_t 1106260368Sluiginm_kr_space(struct netmap_kring *k, int is_rx) 1107260368Sluigi{ 1108260368Sluigi int space; 1109260368Sluigi 1110260368Sluigi if (is_rx) { 1111260368Sluigi int busy = k->nkr_hwlease - k->nr_hwcur; 1112260368Sluigi if (busy < 0) 1113260368Sluigi busy += k->nkr_num_slots; 1114260368Sluigi space = k->nkr_num_slots - 1 - busy; 1115260368Sluigi } else { 1116260368Sluigi /* XXX never used in this branch */ 1117260368Sluigi space = k->nr_hwtail - k->nkr_hwlease; 1118260368Sluigi if (space < 0) 1119260368Sluigi space += k->nkr_num_slots; 1120260368Sluigi } 1121260368Sluigi#if 0 1122260368Sluigi // sanity check 1123260368Sluigi if (k->nkr_hwlease >= k->nkr_num_slots || 1124260368Sluigi k->nr_hwcur >= k->nkr_num_slots || 1125260368Sluigi k->nr_tail >= k->nkr_num_slots || 1126260368Sluigi busy < 0 || 1127260368Sluigi busy >= k->nkr_num_slots) { 1128260368Sluigi D("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d", k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease, 1129260368Sluigi k->nkr_lease_idx, k->nkr_num_slots); 1130260368Sluigi } 1131260368Sluigi#endif 1132260368Sluigi return space; 1133260368Sluigi} 1134260368Sluigi 1135260368Sluigi 1136260368Sluigi 1137260368Sluigi 1138260368Sluigi/* make a lease on the kring for N positions. return the 1139260368Sluigi * lease index 1140260368Sluigi * XXX only used in VALE code and with is_rx = 1 1141260368Sluigi */ 1142260368Sluigistatic inline uint32_t 1143260368Sluiginm_kr_lease(struct netmap_kring *k, u_int n, int is_rx) 1144260368Sluigi{ 1145260368Sluigi uint32_t lim = k->nkr_num_slots - 1; 1146260368Sluigi uint32_t lease_idx = k->nkr_lease_idx; 1147260368Sluigi 1148260368Sluigi k->nkr_leases[lease_idx] = NR_NOSLOT; 1149260368Sluigi k->nkr_lease_idx = nm_next(lease_idx, lim); 1150260368Sluigi 1151260368Sluigi if (n > nm_kr_space(k, is_rx)) { 1152260368Sluigi D("invalid request for %d slots", n); 1153260368Sluigi panic("x"); 1154260368Sluigi } 1155260368Sluigi /* XXX verify that there are n slots */ 1156260368Sluigi k->nkr_hwlease += n; 1157260368Sluigi if (k->nkr_hwlease > lim) 1158260368Sluigi k->nkr_hwlease -= lim + 1; 1159260368Sluigi 1160260368Sluigi if (k->nkr_hwlease >= k->nkr_num_slots || 1161260368Sluigi k->nr_hwcur >= k->nkr_num_slots || 1162260368Sluigi k->nr_hwtail >= k->nkr_num_slots || 1163260368Sluigi k->nkr_lease_idx >= k->nkr_num_slots) { 1164260368Sluigi D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d", 1165260368Sluigi k->na->ifp->if_xname, 1166260368Sluigi k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease, 1167260368Sluigi k->nkr_lease_idx, k->nkr_num_slots); 1168260368Sluigi } 1169260368Sluigi return lease_idx; 1170260368Sluigi} 1171260368Sluigi 1172260368Sluigi/* 1173259412Sluigi * This flush routine supports only unicast and broadcast but a large 1174259412Sluigi * number of ports, and lets us replace the learn and dispatch functions. 1175259412Sluigi */ 1176259412Sluigiint 1177259412Sluiginm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, 1178259412Sluigi u_int ring_nr) 1179259412Sluigi{ 1180259412Sluigi struct nm_bdg_q *dst_ents, *brddst; 1181259412Sluigi uint16_t num_dsts = 0, *dsts; 1182259412Sluigi struct nm_bridge *b = na->na_bdg; 1183259412Sluigi u_int i, j, me = na->bdg_port; 1184259412Sluigi 1185259412Sluigi /* 1186259412Sluigi * The work area (pointed by ft) is followed by an array of 1187259412Sluigi * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS 1188259412Sluigi * queues per port plus one for the broadcast traffic. 1189259412Sluigi * Then we have an array of destination indexes. 1190259412Sluigi */ 1191259412Sluigi dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); 1192259412Sluigi dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1); 1193259412Sluigi 1194259412Sluigi /* first pass: find a destination for each packet in the batch */ 1195259412Sluigi for (i = 0; likely(i < n); i += ft[i].ft_frags) { 1196259412Sluigi uint8_t dst_ring = ring_nr; /* default, same ring as origin */ 1197259412Sluigi uint16_t dst_port, d_i; 1198259412Sluigi struct nm_bdg_q *d; 1199259412Sluigi uint8_t *buf = ft[i].ft_buf; 1200259412Sluigi u_int len = ft[i].ft_len; 1201259412Sluigi 1202259412Sluigi ND("slot %d frags %d", i, ft[i].ft_frags); 1203261909Sluigi /* Drop the packet if the virtio-net header is not into the first 1204259412Sluigi fragment nor at the very beginning of the second. */ 1205261909Sluigi if (unlikely(na->virt_hdr_len > len)) 1206259412Sluigi continue; 1207261909Sluigi if (len == na->virt_hdr_len) { 1208259412Sluigi buf = ft[i+1].ft_buf; 1209259412Sluigi len = ft[i+1].ft_len; 1210259412Sluigi } else { 1211261909Sluigi buf += na->virt_hdr_len; 1212261909Sluigi len -= na->virt_hdr_len; 1213259412Sluigi } 1214259412Sluigi dst_port = b->nm_bdg_lookup(buf, len, &dst_ring, na); 1215259412Sluigi if (netmap_verbose > 255) 1216259412Sluigi RD(5, "slot %d port %d -> %d", i, me, dst_port); 1217259412Sluigi if (dst_port == NM_BDG_NOPORT) 1218259412Sluigi continue; /* this packet is identified to be dropped */ 1219259412Sluigi else if (unlikely(dst_port > NM_BDG_MAXPORTS)) 1220259412Sluigi continue; 1221259412Sluigi else if (dst_port == NM_BDG_BROADCAST) 1222259412Sluigi dst_ring = 0; /* broadcasts always go to ring 0 */ 1223259412Sluigi else if (unlikely(dst_port == me || 1224259412Sluigi !b->bdg_ports[dst_port])) 1225259412Sluigi continue; 1226259412Sluigi 1227259412Sluigi /* get a position in the scratch pad */ 1228259412Sluigi d_i = dst_port * NM_BDG_MAXRINGS + dst_ring; 1229259412Sluigi d = dst_ents + d_i; 1230259412Sluigi 1231259412Sluigi /* append the first fragment to the list */ 1232259412Sluigi if (d->bq_head == NM_FT_NULL) { /* new destination */ 1233259412Sluigi d->bq_head = d->bq_tail = i; 1234259412Sluigi /* remember this position to be scanned later */ 1235259412Sluigi if (dst_port != NM_BDG_BROADCAST) 1236259412Sluigi dsts[num_dsts++] = d_i; 1237259412Sluigi } else { 1238259412Sluigi ft[d->bq_tail].ft_next = i; 1239259412Sluigi d->bq_tail = i; 1240259412Sluigi } 1241259412Sluigi d->bq_len += ft[i].ft_frags; 1242259412Sluigi } 1243259412Sluigi 1244259412Sluigi /* 1245259412Sluigi * Broadcast traffic goes to ring 0 on all destinations. 1246259412Sluigi * So we need to add these rings to the list of ports to scan. 1247259412Sluigi * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is 1248259412Sluigi * expensive. We should keep a compact list of active destinations 1249259412Sluigi * so we could shorten this loop. 1250259412Sluigi */ 1251259412Sluigi brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS; 1252259412Sluigi if (brddst->bq_head != NM_FT_NULL) { 1253259412Sluigi for (j = 0; likely(j < b->bdg_active_ports); j++) { 1254259412Sluigi uint16_t d_i; 1255259412Sluigi i = b->bdg_port_index[j]; 1256259412Sluigi if (unlikely(i == me)) 1257259412Sluigi continue; 1258259412Sluigi d_i = i * NM_BDG_MAXRINGS; 1259259412Sluigi if (dst_ents[d_i].bq_head == NM_FT_NULL) 1260259412Sluigi dsts[num_dsts++] = d_i; 1261259412Sluigi } 1262259412Sluigi } 1263259412Sluigi 1264259412Sluigi ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts); 1265259412Sluigi /* second pass: scan destinations (XXX will be modular somehow) */ 1266259412Sluigi for (i = 0; i < num_dsts; i++) { 1267259412Sluigi struct ifnet *dst_ifp; 1268259412Sluigi struct netmap_vp_adapter *dst_na; 1269259412Sluigi struct netmap_kring *kring; 1270259412Sluigi struct netmap_ring *ring; 1271261909Sluigi u_int dst_nr, lim, j, d_i, next, brd_next; 1272259412Sluigi u_int needed, howmany; 1273259412Sluigi int retry = netmap_txsync_retry; 1274259412Sluigi struct nm_bdg_q *d; 1275259412Sluigi uint32_t my_start = 0, lease_idx = 0; 1276259412Sluigi int nrings; 1277261909Sluigi int virt_hdr_mismatch = 0; 1278259412Sluigi 1279259412Sluigi d_i = dsts[i]; 1280259412Sluigi ND("second pass %d port %d", i, d_i); 1281259412Sluigi d = dst_ents + d_i; 1282259412Sluigi // XXX fix the division 1283259412Sluigi dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS]; 1284259412Sluigi /* protect from the lookup function returning an inactive 1285259412Sluigi * destination port 1286259412Sluigi */ 1287259412Sluigi if (unlikely(dst_na == NULL)) 1288259412Sluigi goto cleanup; 1289259412Sluigi if (dst_na->up.na_flags & NAF_SW_ONLY) 1290259412Sluigi goto cleanup; 1291259412Sluigi dst_ifp = dst_na->up.ifp; 1292259412Sluigi /* 1293259412Sluigi * The interface may be in !netmap mode in two cases: 1294259412Sluigi * - when na is attached but not activated yet; 1295259412Sluigi * - when na is being deactivated but is still attached. 1296259412Sluigi */ 1297259412Sluigi if (unlikely(!(dst_ifp->if_capenable & IFCAP_NETMAP))) { 1298259412Sluigi ND("not in netmap mode!"); 1299259412Sluigi goto cleanup; 1300259412Sluigi } 1301259412Sluigi 1302259412Sluigi /* there is at least one either unicast or broadcast packet */ 1303259412Sluigi brd_next = brddst->bq_head; 1304259412Sluigi next = d->bq_head; 1305259412Sluigi /* we need to reserve this many slots. If fewer are 1306259412Sluigi * available, some packets will be dropped. 1307259412Sluigi * Packets may have multiple fragments, so we may not use 1308259412Sluigi * there is a chance that we may not use all of the slots 1309259412Sluigi * we have claimed, so we will need to handle the leftover 1310259412Sluigi * ones when we regain the lock. 1311259412Sluigi */ 1312259412Sluigi needed = d->bq_len + brddst->bq_len; 1313259412Sluigi 1314261909Sluigi if (unlikely(dst_na->virt_hdr_len != na->virt_hdr_len)) { 1315261909Sluigi /* There is a virtio-net header/offloadings mismatch between 1316261909Sluigi * source and destination. The slower mismatch datapath will 1317261909Sluigi * be used to cope with all the mismatches. 1318261909Sluigi */ 1319261909Sluigi virt_hdr_mismatch = 1; 1320261909Sluigi if (dst_na->mfs < na->mfs) { 1321261909Sluigi /* We may need to do segmentation offloadings, and so 1322261909Sluigi * we may need a number of destination slots greater 1323261909Sluigi * than the number of input slots ('needed'). 1324261909Sluigi * We look for the smallest integer 'x' which satisfies: 1325261909Sluigi * needed * na->mfs + x * H <= x * na->mfs 1326261909Sluigi * where 'H' is the length of the longest header that may 1327261909Sluigi * be replicated in the segmentation process (e.g. for 1328261909Sluigi * TCPv4 we must account for ethernet header, IP header 1329261909Sluigi * and TCPv4 header). 1330261909Sluigi */ 1331261909Sluigi needed = (needed * na->mfs) / 1332261909Sluigi (dst_na->mfs - WORST_CASE_GSO_HEADER) + 1; 1333261909Sluigi ND(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed); 1334261909Sluigi } 1335261909Sluigi } 1336261909Sluigi 1337259412Sluigi ND(5, "pass 2 dst %d is %x %s", 1338259412Sluigi i, d_i, is_vp ? "virtual" : "nic/host"); 1339259412Sluigi dst_nr = d_i & (NM_BDG_MAXRINGS-1); 1340259412Sluigi nrings = dst_na->up.num_rx_rings; 1341259412Sluigi if (dst_nr >= nrings) 1342259412Sluigi dst_nr = dst_nr % nrings; 1343259412Sluigi kring = &dst_na->up.rx_rings[dst_nr]; 1344259412Sluigi ring = kring->ring; 1345259412Sluigi lim = kring->nkr_num_slots - 1; 1346259412Sluigi 1347259412Sluigiretry: 1348259412Sluigi 1349261909Sluigi if (dst_na->retry && retry) { 1350261909Sluigi /* try to get some free slot from the previous run */ 1351261909Sluigi dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0); 1352261909Sluigi } 1353259412Sluigi /* reserve the buffers in the queue and an entry 1354259412Sluigi * to report completion, and drop lock. 1355259412Sluigi * XXX this might become a helper function. 1356259412Sluigi */ 1357259412Sluigi mtx_lock(&kring->q_lock); 1358259412Sluigi if (kring->nkr_stopped) { 1359259412Sluigi mtx_unlock(&kring->q_lock); 1360259412Sluigi goto cleanup; 1361259412Sluigi } 1362259412Sluigi my_start = j = kring->nkr_hwlease; 1363259412Sluigi howmany = nm_kr_space(kring, 1); 1364259412Sluigi if (needed < howmany) 1365259412Sluigi howmany = needed; 1366259412Sluigi lease_idx = nm_kr_lease(kring, howmany, 1); 1367259412Sluigi mtx_unlock(&kring->q_lock); 1368259412Sluigi 1369259412Sluigi /* only retry if we need more than available slots */ 1370259412Sluigi if (retry && needed <= howmany) 1371259412Sluigi retry = 0; 1372259412Sluigi 1373259412Sluigi /* copy to the destination queue */ 1374259412Sluigi while (howmany > 0) { 1375259412Sluigi struct netmap_slot *slot; 1376259412Sluigi struct nm_bdg_fwd *ft_p, *ft_end; 1377259412Sluigi u_int cnt; 1378259412Sluigi 1379259412Sluigi /* find the queue from which we pick next packet. 1380259412Sluigi * NM_FT_NULL is always higher than valid indexes 1381259412Sluigi * so we never dereference it if the other list 1382259412Sluigi * has packets (and if both are empty we never 1383259412Sluigi * get here). 1384259412Sluigi */ 1385259412Sluigi if (next < brd_next) { 1386259412Sluigi ft_p = ft + next; 1387259412Sluigi next = ft_p->ft_next; 1388259412Sluigi } else { /* insert broadcast */ 1389259412Sluigi ft_p = ft + brd_next; 1390259412Sluigi brd_next = ft_p->ft_next; 1391259412Sluigi } 1392259412Sluigi cnt = ft_p->ft_frags; // cnt > 0 1393259412Sluigi if (unlikely(cnt > howmany)) 1394259412Sluigi break; /* no more space */ 1395259412Sluigi if (netmap_verbose && cnt > 1) 1396259412Sluigi RD(5, "rx %d frags to %d", cnt, j); 1397259412Sluigi ft_end = ft_p + cnt; 1398261909Sluigi if (unlikely(virt_hdr_mismatch)) { 1399261909Sluigi bdg_mismatch_datapath(na, dst_na, ft_p, ring, &j, lim, &howmany); 1400261909Sluigi } else { 1401261909Sluigi howmany -= cnt; 1402261909Sluigi do { 1403261909Sluigi char *dst, *src = ft_p->ft_buf; 1404261909Sluigi size_t copy_len = ft_p->ft_len, dst_len = copy_len; 1405259412Sluigi 1406261909Sluigi slot = &ring->slot[j]; 1407261909Sluigi dst = BDG_NMB(&dst_na->up, slot); 1408259412Sluigi 1409261909Sluigi ND("send [%d] %d(%d) bytes at %s:%d", 1410261909Sluigi i, (int)copy_len, (int)dst_len, 1411261909Sluigi NM_IFPNAME(dst_ifp), j); 1412261909Sluigi /* round to a multiple of 64 */ 1413261909Sluigi copy_len = (copy_len + 63) & ~63; 1414260368Sluigi 1415261909Sluigi if (ft_p->ft_flags & NS_INDIRECT) { 1416261909Sluigi if (copyin(src, dst, copy_len)) { 1417261909Sluigi // invalid user pointer, pretend len is 0 1418261909Sluigi dst_len = 0; 1419261909Sluigi } 1420261909Sluigi } else { 1421261909Sluigi //memcpy(dst, src, copy_len); 1422261909Sluigi pkt_copy(src, dst, (int)copy_len); 1423261909Sluigi } 1424261909Sluigi slot->len = dst_len; 1425261909Sluigi slot->flags = (cnt << 8)| NS_MOREFRAG; 1426261909Sluigi j = nm_next(j, lim); 1427261909Sluigi needed--; 1428261909Sluigi ft_p++; 1429261909Sluigi } while (ft_p != ft_end); 1430261909Sluigi slot->flags = (cnt << 8); /* clear flag on last entry */ 1431261909Sluigi } 1432259412Sluigi /* are we done ? */ 1433259412Sluigi if (next == NM_FT_NULL && brd_next == NM_FT_NULL) 1434259412Sluigi break; 1435259412Sluigi } 1436259412Sluigi { 1437259412Sluigi /* current position */ 1438259412Sluigi uint32_t *p = kring->nkr_leases; /* shorthand */ 1439259412Sluigi uint32_t update_pos; 1440259412Sluigi int still_locked = 1; 1441259412Sluigi 1442259412Sluigi mtx_lock(&kring->q_lock); 1443259412Sluigi if (unlikely(howmany > 0)) { 1444259412Sluigi /* not used all bufs. If i am the last one 1445259412Sluigi * i can recover the slots, otherwise must 1446259412Sluigi * fill them with 0 to mark empty packets. 1447259412Sluigi */ 1448259412Sluigi ND("leftover %d bufs", howmany); 1449259412Sluigi if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) { 1450259412Sluigi /* yes i am the last one */ 1451259412Sluigi ND("roll back nkr_hwlease to %d", j); 1452259412Sluigi kring->nkr_hwlease = j; 1453259412Sluigi } else { 1454259412Sluigi while (howmany-- > 0) { 1455259412Sluigi ring->slot[j].len = 0; 1456259412Sluigi ring->slot[j].flags = 0; 1457259412Sluigi j = nm_next(j, lim); 1458259412Sluigi } 1459259412Sluigi } 1460259412Sluigi } 1461259412Sluigi p[lease_idx] = j; /* report I am done */ 1462259412Sluigi 1463260368Sluigi update_pos = kring->nr_hwtail; 1464259412Sluigi 1465259412Sluigi if (my_start == update_pos) { 1466259412Sluigi /* all slots before my_start have been reported, 1467259412Sluigi * so scan subsequent leases to see if other ranges 1468259412Sluigi * have been completed, and to a selwakeup or txsync. 1469259412Sluigi */ 1470259412Sluigi while (lease_idx != kring->nkr_lease_idx && 1471259412Sluigi p[lease_idx] != NR_NOSLOT) { 1472259412Sluigi j = p[lease_idx]; 1473259412Sluigi p[lease_idx] = NR_NOSLOT; 1474259412Sluigi lease_idx = nm_next(lease_idx, lim); 1475259412Sluigi } 1476259412Sluigi /* j is the new 'write' position. j != my_start 1477259412Sluigi * means there are new buffers to report 1478259412Sluigi */ 1479259412Sluigi if (likely(j != my_start)) { 1480260368Sluigi kring->nr_hwtail = j; 1481259412Sluigi still_locked = 0; 1482259412Sluigi mtx_unlock(&kring->q_lock); 1483261909Sluigi dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0); 1484259412Sluigi if (dst_na->retry && retry--) 1485259412Sluigi goto retry; 1486259412Sluigi } 1487259412Sluigi } 1488259412Sluigi if (still_locked) 1489259412Sluigi mtx_unlock(&kring->q_lock); 1490259412Sluigi } 1491259412Sluigicleanup: 1492259412Sluigi d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */ 1493259412Sluigi d->bq_len = 0; 1494259412Sluigi } 1495259412Sluigi brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */ 1496259412Sluigi brddst->bq_len = 0; 1497259412Sluigi return 0; 1498259412Sluigi} 1499259412Sluigi 1500260368Sluigi 1501259412Sluigistatic int 1502259412Sluiginetmap_vp_txsync(struct netmap_vp_adapter *na, u_int ring_nr, int flags) 1503259412Sluigi{ 1504259412Sluigi struct netmap_kring *kring = &na->up.tx_rings[ring_nr]; 1505260368Sluigi u_int done; 1506260368Sluigi u_int const lim = kring->nkr_num_slots - 1; 1507260368Sluigi u_int const cur = kring->rcur; 1508259412Sluigi 1509259412Sluigi if (bridge_batch <= 0) { /* testing only */ 1510260368Sluigi done = cur; // used all 1511259412Sluigi goto done; 1512259412Sluigi } 1513259412Sluigi if (bridge_batch > NM_BDG_BATCH) 1514259412Sluigi bridge_batch = NM_BDG_BATCH; 1515259412Sluigi 1516260368Sluigi done = nm_bdg_preflush(na, ring_nr, kring, cur); 1517259412Sluigidone: 1518260368Sluigi if (done != cur) 1519260368Sluigi D("early break at %d/ %d, tail %d", done, cur, kring->nr_hwtail); 1520260368Sluigi /* 1521260368Sluigi * packets between 'done' and 'cur' are left unsent. 1522260368Sluigi */ 1523260368Sluigi kring->nr_hwcur = done; 1524260368Sluigi kring->nr_hwtail = nm_prev(done, lim); 1525260368Sluigi nm_txsync_finalize(kring); 1526259412Sluigi if (netmap_verbose) 1527259412Sluigi D("%s ring %d flags %d", NM_IFPNAME(na->up.ifp), ring_nr, flags); 1528259412Sluigi return 0; 1529259412Sluigi} 1530259412Sluigi 1531259412Sluigi 1532259412Sluigi/* 1533259412Sluigi * main dispatch routine for the bridge. 1534259412Sluigi * We already know that only one thread is running this. 1535259412Sluigi * we must run nm_bdg_preflush without lock. 1536259412Sluigi */ 1537259412Sluigistatic int 1538259412Sluigibdg_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) 1539259412Sluigi{ 1540259412Sluigi struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na; 1541259412Sluigi return netmap_vp_txsync(vpna, ring_nr, flags); 1542259412Sluigi} 1543259412Sluigi 1544259412Sluigistatic int 1545260368Sluiginetmap_vp_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) 1546259412Sluigi{ 1547259412Sluigi struct netmap_kring *kring = &na->rx_rings[ring_nr]; 1548259412Sluigi struct netmap_ring *ring = kring->ring; 1549260368Sluigi u_int nm_i, lim = kring->nkr_num_slots - 1; 1550260368Sluigi u_int head = nm_rxsync_prologue(kring); 1551259412Sluigi int n; 1552259412Sluigi 1553260368Sluigi if (head > lim) { 1554259412Sluigi D("ouch dangerous reset!!!"); 1555259412Sluigi n = netmap_ring_reinit(kring); 1556259412Sluigi goto done; 1557259412Sluigi } 1558259412Sluigi 1559260368Sluigi /* First part, import newly received packets. */ 1560260368Sluigi /* actually nothing to do here, they are already in the kring */ 1561259412Sluigi 1562260368Sluigi /* Second part, skip past packets that userspace has released. */ 1563260368Sluigi nm_i = kring->nr_hwcur; 1564260368Sluigi if (nm_i != head) { 1565260368Sluigi /* consistency check, but nothing really important here */ 1566260368Sluigi for (n = 0; likely(nm_i != head); n++) { 1567260368Sluigi struct netmap_slot *slot = &ring->slot[nm_i]; 1568259412Sluigi void *addr = BDG_NMB(na, slot); 1569259412Sluigi 1570259412Sluigi if (addr == netmap_buffer_base) { /* bad buf */ 1571259412Sluigi D("bad buffer index %d, ignore ?", 1572259412Sluigi slot->buf_idx); 1573259412Sluigi } 1574259412Sluigi slot->flags &= ~NS_BUF_CHANGED; 1575260368Sluigi nm_i = nm_next(nm_i, lim); 1576259412Sluigi } 1577260368Sluigi kring->nr_hwcur = head; 1578259412Sluigi } 1579260368Sluigi 1580259412Sluigi /* tell userspace that there are new packets */ 1581260368Sluigi nm_rxsync_finalize(kring); 1582259412Sluigi n = 0; 1583259412Sluigidone: 1584260368Sluigi return n; 1585260368Sluigi} 1586260368Sluigi 1587260368Sluigi/* 1588260368Sluigi * user process reading from a VALE switch. 1589260368Sluigi * Already protected against concurrent calls from userspace, 1590260368Sluigi * but we must acquire the queue's lock to protect against 1591260368Sluigi * writers on the same queue. 1592260368Sluigi */ 1593260368Sluigistatic int 1594260368Sluigibdg_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) 1595260368Sluigi{ 1596260368Sluigi struct netmap_kring *kring = &na->rx_rings[ring_nr]; 1597260368Sluigi int n; 1598260368Sluigi 1599260368Sluigi mtx_lock(&kring->q_lock); 1600260368Sluigi n = netmap_vp_rxsync(na, ring_nr, flags); 1601259412Sluigi mtx_unlock(&kring->q_lock); 1602259412Sluigi return n; 1603259412Sluigi} 1604259412Sluigi 1605260368Sluigi 1606259412Sluigistatic int 1607259412Sluigibdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp) 1608259412Sluigi{ 1609259412Sluigi struct netmap_vp_adapter *vpna; 1610259412Sluigi struct netmap_adapter *na; 1611259412Sluigi int error; 1612261909Sluigi u_int npipes = 0; 1613259412Sluigi 1614259412Sluigi vpna = malloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO); 1615259412Sluigi if (vpna == NULL) 1616259412Sluigi return ENOMEM; 1617259412Sluigi 1618259412Sluigi na = &vpna->up; 1619259412Sluigi 1620259412Sluigi na->ifp = ifp; 1621259412Sluigi 1622259412Sluigi /* bound checking */ 1623259412Sluigi na->num_tx_rings = nmr->nr_tx_rings; 1624259412Sluigi nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); 1625259412Sluigi nmr->nr_tx_rings = na->num_tx_rings; // write back 1626259412Sluigi na->num_rx_rings = nmr->nr_rx_rings; 1627259412Sluigi nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); 1628259412Sluigi nmr->nr_rx_rings = na->num_rx_rings; // write back 1629259412Sluigi nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE, 1630259412Sluigi 1, NM_BDG_MAXSLOTS, NULL); 1631259412Sluigi na->num_tx_desc = nmr->nr_tx_slots; 1632259412Sluigi nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE, 1633259412Sluigi 1, NM_BDG_MAXSLOTS, NULL); 1634261909Sluigi /* validate number of pipes. We want at least 1, 1635261909Sluigi * but probably can do with some more. 1636261909Sluigi * So let's use 2 as default (when 0 is supplied) 1637261909Sluigi */ 1638261909Sluigi npipes = nmr->nr_arg1; 1639261909Sluigi nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL); 1640261909Sluigi nmr->nr_arg1 = npipes; /* write back */ 1641261909Sluigi /* validate extra bufs */ 1642261909Sluigi nm_bound_var(&nmr->nr_arg3, 0, 0, 1643261909Sluigi 128*NM_BDG_MAXSLOTS, NULL); 1644259412Sluigi na->num_rx_desc = nmr->nr_rx_slots; 1645261909Sluigi vpna->virt_hdr_len = 0; 1646261909Sluigi vpna->mfs = 1514; 1647261909Sluigi /*if (vpna->mfs > netmap_buf_size) TODO netmap_buf_size is zero?? 1648261909Sluigi vpna->mfs = netmap_buf_size; */ 1649261909Sluigi if (netmap_verbose) 1650261909Sluigi D("max frame size %u", vpna->mfs); 1651259412Sluigi 1652259412Sluigi na->na_flags |= NAF_BDG_MAYSLEEP | NAF_MEM_OWNER; 1653259412Sluigi na->nm_txsync = bdg_netmap_txsync; 1654259412Sluigi na->nm_rxsync = bdg_netmap_rxsync; 1655259412Sluigi na->nm_register = bdg_netmap_reg; 1656259412Sluigi na->nm_dtor = netmap_adapter_vp_dtor; 1657259412Sluigi na->nm_krings_create = netmap_vp_krings_create; 1658259412Sluigi na->nm_krings_delete = netmap_vp_krings_delete; 1659259412Sluigi na->nm_mem = netmap_mem_private_new(NM_IFPNAME(na->ifp), 1660259412Sluigi na->num_tx_rings, na->num_tx_desc, 1661261909Sluigi na->num_rx_rings, na->num_rx_desc, 1662261909Sluigi nmr->nr_arg3, npipes, &error); 1663261909Sluigi if (na->nm_mem == NULL) 1664261909Sluigi goto err; 1665259412Sluigi /* other nmd fields are set in the common routine */ 1666259412Sluigi error = netmap_attach_common(na); 1667261909Sluigi if (error) 1668261909Sluigi goto err; 1669259412Sluigi return 0; 1670261909Sluigi 1671261909Sluigierr: 1672261909Sluigi if (na->nm_mem != NULL) 1673261909Sluigi netmap_mem_private_delete(na->nm_mem); 1674261909Sluigi free(vpna, M_DEVBUF); 1675261909Sluigi return error; 1676259412Sluigi} 1677259412Sluigi 1678260368Sluigi 1679259412Sluigistatic void 1680259412Sluiginetmap_bwrap_dtor(struct netmap_adapter *na) 1681259412Sluigi{ 1682259412Sluigi struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; 1683259412Sluigi struct netmap_adapter *hwna = bna->hwna; 1684259412Sluigi struct nm_bridge *b = bna->up.na_bdg, 1685259412Sluigi *bh = bna->host.na_bdg; 1686259412Sluigi struct ifnet *ifp = na->ifp; 1687259412Sluigi 1688259412Sluigi ND("na %p", na); 1689259412Sluigi 1690259412Sluigi if (b) { 1691259412Sluigi netmap_bdg_detach_common(b, bna->up.bdg_port, 1692259412Sluigi (bh ? bna->host.bdg_port : -1)); 1693259412Sluigi } 1694259412Sluigi 1695259412Sluigi hwna->na_private = NULL; 1696259412Sluigi netmap_adapter_put(hwna); 1697259412Sluigi 1698259412Sluigi bzero(ifp, sizeof(*ifp)); 1699259412Sluigi free(ifp, M_DEVBUF); 1700259412Sluigi na->ifp = NULL; 1701259412Sluigi 1702259412Sluigi} 1703259412Sluigi 1704260368Sluigi 1705259412Sluigi/* 1706260368Sluigi * Intr callback for NICs connected to a bridge. 1707260368Sluigi * Simply ignore tx interrupts (maybe we could try to recover space ?) 1708260368Sluigi * and pass received packets from nic to the bridge. 1709260368Sluigi * 1710259412Sluigi * XXX TODO check locking: this is called from the interrupt 1711259412Sluigi * handler so we should make sure that the interface is not 1712259412Sluigi * disconnected while passing down an interrupt. 1713259412Sluigi * 1714260368Sluigi * Note, no user process can access this NIC or the host stack. 1715260368Sluigi * The only part of the ring that is significant are the slots, 1716260368Sluigi * and head/cur/tail are set from the kring as needed 1717260368Sluigi * (part as a receive ring, part as a transmit ring). 1718260368Sluigi * 1719260368Sluigi * callback that overwrites the hwna notify callback. 1720259412Sluigi * Packets come from the outside or from the host stack and are put on an hwna rx ring. 1721259412Sluigi * The bridge wrapper then sends the packets through the bridge. 1722259412Sluigi */ 1723259412Sluigistatic int 1724259412Sluiginetmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, int flags) 1725259412Sluigi{ 1726259412Sluigi struct ifnet *ifp = na->ifp; 1727259412Sluigi struct netmap_bwrap_adapter *bna = na->na_private; 1728259412Sluigi struct netmap_vp_adapter *hostna = &bna->host; 1729259412Sluigi struct netmap_kring *kring, *bkring; 1730259412Sluigi struct netmap_ring *ring; 1731259412Sluigi int is_host_ring = ring_nr == na->num_rx_rings; 1732259412Sluigi struct netmap_vp_adapter *vpna = &bna->up; 1733259412Sluigi int error = 0; 1734259412Sluigi 1735260368Sluigi if (netmap_verbose) 1736260368Sluigi D("%s %s%d 0x%x", NM_IFPNAME(ifp), 1737260368Sluigi (tx == NR_TX ? "TX" : "RX"), ring_nr, flags); 1738259412Sluigi 1739259412Sluigi if (flags & NAF_DISABLE_NOTIFY) { 1740259412Sluigi kring = tx == NR_TX ? na->tx_rings : na->rx_rings; 1741259412Sluigi bkring = tx == NR_TX ? vpna->up.rx_rings : vpna->up.tx_rings; 1742260368Sluigi if (kring[ring_nr].nkr_stopped) 1743260368Sluigi netmap_disable_ring(&bkring[ring_nr]); 1744259412Sluigi else 1745260368Sluigi bkring[ring_nr].nkr_stopped = 0; 1746259412Sluigi return 0; 1747259412Sluigi } 1748259412Sluigi 1749259412Sluigi if (ifp == NULL || !(ifp->if_capenable & IFCAP_NETMAP)) 1750259412Sluigi return 0; 1751259412Sluigi 1752260368Sluigi /* we only care about receive interrupts */ 1753259412Sluigi if (tx == NR_TX) 1754259412Sluigi return 0; 1755259412Sluigi 1756259412Sluigi kring = &na->rx_rings[ring_nr]; 1757259412Sluigi ring = kring->ring; 1758259412Sluigi 1759259412Sluigi /* make sure the ring is not disabled */ 1760259412Sluigi if (nm_kr_tryget(kring)) 1761259412Sluigi return 0; 1762259412Sluigi 1763259412Sluigi if (is_host_ring && hostna->na_bdg == NULL) { 1764259412Sluigi error = bna->save_notify(na, ring_nr, tx, flags); 1765259412Sluigi goto put_out; 1766259412Sluigi } 1767259412Sluigi 1768260368Sluigi /* Here we expect ring->head = ring->cur = ring->tail 1769260368Sluigi * because everything has been released from the previous round. 1770260368Sluigi * However the ring is shared and we might have info from 1771260368Sluigi * the wrong side (the tx ring). Hence we overwrite with 1772260368Sluigi * the info from the rx kring. 1773260368Sluigi */ 1774260368Sluigi if (netmap_verbose) 1775260368Sluigi D("%s head %d cur %d tail %d (kring %d %d %d)", NM_IFPNAME(ifp), 1776260368Sluigi ring->head, ring->cur, ring->tail, 1777260368Sluigi kring->rhead, kring->rcur, kring->rtail); 1778260368Sluigi 1779260368Sluigi ring->head = kring->rhead; 1780260368Sluigi ring->cur = kring->rcur; 1781260368Sluigi ring->tail = kring->rtail; 1782260368Sluigi 1783259412Sluigi if (is_host_ring) { 1784259412Sluigi vpna = hostna; 1785259412Sluigi ring_nr = 0; 1786267128Sluigi } 1787261909Sluigi /* simulate a user wakeup on the rx ring */ 1788261909Sluigi /* fetch packets that have arrived. 1789261909Sluigi * XXX maybe do this in a loop ? 1790261909Sluigi */ 1791261909Sluigi error = kring->nm_sync(kring, 0); 1792261909Sluigi if (error) 1793261909Sluigi goto put_out; 1794260368Sluigi if (kring->nr_hwcur == kring->nr_hwtail && netmap_verbose) { 1795259412Sluigi D("how strange, interrupt with no packets on %s", 1796259412Sluigi NM_IFPNAME(ifp)); 1797259412Sluigi goto put_out; 1798259412Sluigi } 1799260368Sluigi 1800260368Sluigi /* new packets are ring->cur to ring->tail, and the bkring 1801260368Sluigi * had hwcur == ring->cur. So advance ring->cur to ring->tail 1802260368Sluigi * to push all packets out. 1803260368Sluigi */ 1804260368Sluigi ring->head = ring->cur = ring->tail; 1805260368Sluigi 1806260368Sluigi /* also set tail to what the bwrap expects */ 1807260368Sluigi bkring = &vpna->up.tx_rings[ring_nr]; 1808260368Sluigi ring->tail = bkring->nr_hwtail; // rtail too ? 1809260368Sluigi 1810260368Sluigi /* pass packets to the switch */ 1811260368Sluigi nm_txsync_prologue(bkring); // XXX error checking ? 1812259412Sluigi netmap_vp_txsync(vpna, ring_nr, flags); 1813259412Sluigi 1814260368Sluigi /* mark all buffers as released on this ring */ 1815260368Sluigi ring->head = ring->cur = kring->nr_hwtail; 1816260368Sluigi ring->tail = kring->rtail; 1817260368Sluigi /* another call to actually release the buffers */ 1818260368Sluigi if (!is_host_ring) { 1819261909Sluigi error = kring->nm_sync(kring, 0); 1820260368Sluigi } else { 1821260368Sluigi /* mark all packets as released, as in the 1822260368Sluigi * second part of netmap_rxsync_from_host() 1823260368Sluigi */ 1824260368Sluigi kring->nr_hwcur = kring->nr_hwtail; 1825260368Sluigi nm_rxsync_finalize(kring); 1826260368Sluigi } 1827259412Sluigi 1828259412Sluigiput_out: 1829259412Sluigi nm_kr_put(kring); 1830259412Sluigi return error; 1831259412Sluigi} 1832259412Sluigi 1833260368Sluigi 1834259412Sluigistatic int 1835259412Sluiginetmap_bwrap_register(struct netmap_adapter *na, int onoff) 1836259412Sluigi{ 1837259412Sluigi struct netmap_bwrap_adapter *bna = 1838259412Sluigi (struct netmap_bwrap_adapter *)na; 1839259412Sluigi struct netmap_adapter *hwna = bna->hwna; 1840259412Sluigi struct netmap_vp_adapter *hostna = &bna->host; 1841259412Sluigi int error; 1842259412Sluigi 1843260368Sluigi ND("%s %s", NM_IFPNAME(na->ifp), onoff ? "on" : "off"); 1844259412Sluigi 1845259412Sluigi if (onoff) { 1846259412Sluigi int i; 1847259412Sluigi 1848259412Sluigi hwna->na_lut = na->na_lut; 1849259412Sluigi hwna->na_lut_objtotal = na->na_lut_objtotal; 1850259412Sluigi 1851259412Sluigi if (hostna->na_bdg) { 1852259412Sluigi hostna->up.na_lut = na->na_lut; 1853259412Sluigi hostna->up.na_lut_objtotal = na->na_lut_objtotal; 1854259412Sluigi } 1855259412Sluigi 1856260516Sluigi /* cross-link the netmap rings 1857260516Sluigi * The original number of rings comes from hwna, 1858260516Sluigi * rx rings on one side equals tx rings on the other. 1859260516Sluigi */ 1860261909Sluigi for (i = 0; i < na->num_rx_rings + 1; i++) { 1861259412Sluigi hwna->tx_rings[i].nkr_num_slots = na->rx_rings[i].nkr_num_slots; 1862259412Sluigi hwna->tx_rings[i].ring = na->rx_rings[i].ring; 1863259412Sluigi } 1864261909Sluigi for (i = 0; i < na->num_tx_rings + 1; i++) { 1865259412Sluigi hwna->rx_rings[i].nkr_num_slots = na->tx_rings[i].nkr_num_slots; 1866259412Sluigi hwna->rx_rings[i].ring = na->tx_rings[i].ring; 1867259412Sluigi } 1868259412Sluigi } 1869259412Sluigi 1870259412Sluigi if (hwna->ifp) { 1871259412Sluigi error = hwna->nm_register(hwna, onoff); 1872259412Sluigi if (error) 1873259412Sluigi return error; 1874259412Sluigi } 1875259412Sluigi 1876259412Sluigi bdg_netmap_reg(na, onoff); 1877259412Sluigi 1878259412Sluigi if (onoff) { 1879259412Sluigi bna->save_notify = hwna->nm_notify; 1880259412Sluigi hwna->nm_notify = netmap_bwrap_intr_notify; 1881259412Sluigi } else { 1882259412Sluigi hwna->nm_notify = bna->save_notify; 1883259412Sluigi hwna->na_lut = NULL; 1884259412Sluigi hwna->na_lut_objtotal = 0; 1885259412Sluigi } 1886259412Sluigi 1887259412Sluigi return 0; 1888259412Sluigi} 1889259412Sluigi 1890260368Sluigi 1891259412Sluigistatic int 1892259412Sluiginetmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd, 1893259412Sluigi u_int *rxr, u_int *rxd) 1894259412Sluigi{ 1895259412Sluigi struct netmap_bwrap_adapter *bna = 1896259412Sluigi (struct netmap_bwrap_adapter *)na; 1897259412Sluigi struct netmap_adapter *hwna = bna->hwna; 1898259412Sluigi 1899259412Sluigi /* forward the request */ 1900259412Sluigi netmap_update_config(hwna); 1901259412Sluigi /* swap the results */ 1902259412Sluigi *txr = hwna->num_rx_rings; 1903259412Sluigi *txd = hwna->num_rx_desc; 1904259412Sluigi *rxr = hwna->num_tx_rings; 1905259412Sluigi *rxd = hwna->num_rx_desc; 1906259412Sluigi 1907259412Sluigi return 0; 1908259412Sluigi} 1909259412Sluigi 1910260368Sluigi 1911259412Sluigistatic int 1912259412Sluiginetmap_bwrap_krings_create(struct netmap_adapter *na) 1913259412Sluigi{ 1914259412Sluigi struct netmap_bwrap_adapter *bna = 1915259412Sluigi (struct netmap_bwrap_adapter *)na; 1916259412Sluigi struct netmap_adapter *hwna = bna->hwna; 1917259412Sluigi struct netmap_adapter *hostna = &bna->host.up; 1918259412Sluigi int error; 1919259412Sluigi 1920259412Sluigi ND("%s", NM_IFPNAME(na->ifp)); 1921259412Sluigi 1922259412Sluigi error = netmap_vp_krings_create(na); 1923259412Sluigi if (error) 1924259412Sluigi return error; 1925259412Sluigi 1926259412Sluigi error = hwna->nm_krings_create(hwna); 1927259412Sluigi if (error) { 1928259412Sluigi netmap_vp_krings_delete(na); 1929259412Sluigi return error; 1930259412Sluigi } 1931259412Sluigi 1932261909Sluigi if (na->na_flags & NAF_HOST_RINGS) { 1933261909Sluigi hostna->tx_rings = na->tx_rings + na->num_tx_rings; 1934261909Sluigi hostna->rx_rings = na->rx_rings + na->num_rx_rings; 1935261909Sluigi } 1936259412Sluigi 1937259412Sluigi return 0; 1938259412Sluigi} 1939259412Sluigi 1940260368Sluigi 1941259412Sluigistatic void 1942259412Sluiginetmap_bwrap_krings_delete(struct netmap_adapter *na) 1943259412Sluigi{ 1944259412Sluigi struct netmap_bwrap_adapter *bna = 1945259412Sluigi (struct netmap_bwrap_adapter *)na; 1946259412Sluigi struct netmap_adapter *hwna = bna->hwna; 1947259412Sluigi 1948259412Sluigi ND("%s", NM_IFPNAME(na->ifp)); 1949259412Sluigi 1950259412Sluigi hwna->nm_krings_delete(hwna); 1951259412Sluigi netmap_vp_krings_delete(na); 1952259412Sluigi} 1953259412Sluigi 1954260368Sluigi 1955259412Sluigi/* notify method for the bridge-->hwna direction */ 1956259412Sluigistatic int 1957259412Sluiginetmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags) 1958259412Sluigi{ 1959259412Sluigi struct netmap_bwrap_adapter *bna = 1960259412Sluigi (struct netmap_bwrap_adapter *)na; 1961259412Sluigi struct netmap_adapter *hwna = bna->hwna; 1962259412Sluigi struct netmap_kring *kring, *hw_kring; 1963259412Sluigi struct netmap_ring *ring; 1964260368Sluigi u_int lim; 1965259412Sluigi int error = 0; 1966259412Sluigi 1967259412Sluigi if (tx == NR_TX) 1968260700Sluigi return EINVAL; 1969259412Sluigi 1970259412Sluigi kring = &na->rx_rings[ring_n]; 1971259412Sluigi hw_kring = &hwna->tx_rings[ring_n]; 1972259412Sluigi ring = kring->ring; 1973259412Sluigi lim = kring->nkr_num_slots - 1; 1974259412Sluigi 1975259412Sluigi if (hwna->ifp == NULL || !(hwna->ifp->if_capenable & IFCAP_NETMAP)) 1976259412Sluigi return 0; 1977261909Sluigi mtx_lock(&kring->q_lock); 1978260368Sluigi /* first step: simulate a user wakeup on the rx ring */ 1979260368Sluigi netmap_vp_rxsync(na, ring_n, flags); 1980260368Sluigi ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", 1981259412Sluigi NM_IFPNAME(na->ifp), ring_n, 1982260368Sluigi kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, 1983260368Sluigi ring->head, ring->cur, ring->tail, 1984260368Sluigi hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail); 1985260368Sluigi /* second step: the simulated user consumes all new packets */ 1986260368Sluigi ring->head = ring->cur = ring->tail; 1987260368Sluigi 1988260368Sluigi /* third step: the new packets are sent on the tx ring 1989260368Sluigi * (which is actually the same ring) 1990260368Sluigi */ 1991260368Sluigi /* set tail to what the hw expects */ 1992260368Sluigi ring->tail = hw_kring->rtail; 1993261909Sluigi nm_txsync_prologue(&hwna->tx_rings[ring_n]); // XXX error checking ? 1994261909Sluigi error = hw_kring->nm_sync(hw_kring, flags); 1995260368Sluigi 1996260368Sluigi /* fourth step: now we are back the rx ring */ 1997260368Sluigi /* claim ownership on all hw owned bufs */ 1998260368Sluigi ring->head = nm_next(ring->tail, lim); /* skip past reserved slot */ 1999260368Sluigi ring->tail = kring->rtail; /* restore saved value of tail, for safety */ 2000260368Sluigi 2001260368Sluigi /* fifth step: the user goes to sleep again, causing another rxsync */ 2002260368Sluigi netmap_vp_rxsync(na, ring_n, flags); 2003260368Sluigi ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", 2004259412Sluigi NM_IFPNAME(na->ifp), ring_n, 2005260368Sluigi kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, 2006260368Sluigi ring->head, ring->cur, ring->tail, 2007260368Sluigi hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail); 2008261909Sluigi mtx_unlock(&kring->q_lock); 2009259412Sluigi return error; 2010259412Sluigi} 2011259412Sluigi 2012260368Sluigi 2013259412Sluigistatic int 2014259412Sluiginetmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags) 2015259412Sluigi{ 2016259412Sluigi struct netmap_bwrap_adapter *bna = na->na_private; 2017259412Sluigi struct netmap_adapter *port_na = &bna->up.up; 2018259412Sluigi if (tx == NR_TX || ring_n != 0) 2019260700Sluigi return EINVAL; 2020259412Sluigi return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags); 2021259412Sluigi} 2022259412Sluigi 2023260368Sluigi 2024259412Sluigi/* attach a bridge wrapper to the 'real' device */ 2025259412Sluigistatic int 2026259412Sluiginetmap_bwrap_attach(struct ifnet *fake, struct ifnet *real) 2027259412Sluigi{ 2028259412Sluigi struct netmap_bwrap_adapter *bna; 2029259412Sluigi struct netmap_adapter *na; 2030259412Sluigi struct netmap_adapter *hwna = NA(real); 2031259412Sluigi struct netmap_adapter *hostna; 2032259412Sluigi int error; 2033259412Sluigi 2034259412Sluigi 2035259412Sluigi bna = malloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO); 2036259412Sluigi if (bna == NULL) 2037259412Sluigi return ENOMEM; 2038259412Sluigi 2039259412Sluigi na = &bna->up.up; 2040259412Sluigi na->ifp = fake; 2041259412Sluigi /* fill the ring data for the bwrap adapter with rx/tx meanings 2042259412Sluigi * swapped. The real cross-linking will be done during register, 2043259412Sluigi * when all the krings will have been created. 2044259412Sluigi */ 2045259412Sluigi na->num_rx_rings = hwna->num_tx_rings; 2046259412Sluigi na->num_tx_rings = hwna->num_rx_rings; 2047259412Sluigi na->num_tx_desc = hwna->num_rx_desc; 2048259412Sluigi na->num_rx_desc = hwna->num_tx_desc; 2049259412Sluigi na->nm_dtor = netmap_bwrap_dtor; 2050259412Sluigi na->nm_register = netmap_bwrap_register; 2051259412Sluigi // na->nm_txsync = netmap_bwrap_txsync; 2052259412Sluigi // na->nm_rxsync = netmap_bwrap_rxsync; 2053259412Sluigi na->nm_config = netmap_bwrap_config; 2054259412Sluigi na->nm_krings_create = netmap_bwrap_krings_create; 2055259412Sluigi na->nm_krings_delete = netmap_bwrap_krings_delete; 2056259412Sluigi na->nm_notify = netmap_bwrap_notify; 2057259412Sluigi na->nm_mem = hwna->nm_mem; 2058259412Sluigi na->na_private = na; /* prevent NIOCREGIF */ 2059259412Sluigi bna->up.retry = 1; /* XXX maybe this should depend on the hwna */ 2060259412Sluigi 2061259412Sluigi bna->hwna = hwna; 2062259412Sluigi netmap_adapter_get(hwna); 2063259412Sluigi hwna->na_private = bna; /* weak reference */ 2064261909Sluigi 2065261909Sluigi if (hwna->na_flags & NAF_HOST_RINGS) { 2066261909Sluigi na->na_flags |= NAF_HOST_RINGS; 2067261909Sluigi hostna = &bna->host.up; 2068261909Sluigi hostna->ifp = hwna->ifp; 2069261909Sluigi hostna->num_tx_rings = 1; 2070261909Sluigi hostna->num_tx_desc = hwna->num_rx_desc; 2071261909Sluigi hostna->num_rx_rings = 1; 2072261909Sluigi hostna->num_rx_desc = hwna->num_tx_desc; 2073261909Sluigi // hostna->nm_txsync = netmap_bwrap_host_txsync; 2074261909Sluigi // hostna->nm_rxsync = netmap_bwrap_host_rxsync; 2075261909Sluigi hostna->nm_notify = netmap_bwrap_host_notify; 2076261909Sluigi hostna->nm_mem = na->nm_mem; 2077261909Sluigi hostna->na_private = bna; 2078261909Sluigi } 2079259412Sluigi 2080260368Sluigi ND("%s<->%s txr %d txd %d rxr %d rxd %d", 2081260368Sluigi fake->if_xname, real->if_xname, 2082259412Sluigi na->num_tx_rings, na->num_tx_desc, 2083259412Sluigi na->num_rx_rings, na->num_rx_desc); 2084259412Sluigi 2085259412Sluigi error = netmap_attach_common(na); 2086259412Sluigi if (error) { 2087259412Sluigi netmap_adapter_put(hwna); 2088259412Sluigi free(bna, M_DEVBUF); 2089259412Sluigi return error; 2090259412Sluigi } 2091259412Sluigi return 0; 2092259412Sluigi} 2093259412Sluigi 2094260368Sluigi 2095259412Sluigivoid 2096259412Sluiginetmap_init_bridges(void) 2097259412Sluigi{ 2098259412Sluigi int i; 2099259412Sluigi bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */ 2100259412Sluigi for (i = 0; i < NM_BRIDGES; i++) 2101259412Sluigi BDG_RWINIT(&nm_bridges[i]); 2102259412Sluigi} 2103259412Sluigi#endif /* WITH_VALE */ 2104