netmap_vale.c revision 260368
1/* 2 * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 27/* 28 * This module implements the VALE switch for netmap 29 30--- VALE SWITCH --- 31 32NMG_LOCK() serializes all modifications to switches and ports. 33A switch cannot be deleted until all ports are gone. 34 35For each switch, an SX lock (RWlock on linux) protects 36deletion of ports. When configuring or deleting a new port, the 37lock is acquired in exclusive mode (after holding NMG_LOCK). 38When forwarding, the lock is acquired in shared mode (without NMG_LOCK). 39The lock is held throughout the entire forwarding cycle, 40during which the thread may incur in a page fault. 41Hence it is important that sleepable shared locks are used. 42 43On the rx ring, the per-port lock is grabbed initially to reserve 44a number of slot in the ring, then the lock is released, 45packets are copied from source to destination, and then 46the lock is acquired again and the receive ring is updated. 47(A similar thing is done on the tx ring for NIC and host stack 48ports attached to the switch) 49 50 */ 51 52/* 53 * OS-specific code that is used only within this file. 54 * Other OS-specific code that must be accessed by drivers 55 * is present in netmap_kern.h 56 */ 57 58#if defined(__FreeBSD__) 59#include <sys/cdefs.h> /* prerequisite */ 60__FBSDID("$FreeBSD: head/sys/dev/netmap/netmap_vale.c 260368 2014-01-06 12:53:15Z luigi $"); 61 62#include <sys/types.h> 63#include <sys/errno.h> 64#include <sys/param.h> /* defines used in kernel.h */ 65#include <sys/kernel.h> /* types used in module initialization */ 66#include <sys/conf.h> /* cdevsw struct, UID, GID */ 67#include <sys/sockio.h> 68#include <sys/socketvar.h> /* struct socket */ 69#include <sys/malloc.h> 70#include <sys/poll.h> 71#include <sys/rwlock.h> 72#include <sys/socket.h> /* sockaddrs */ 73#include <sys/selinfo.h> 74#include <sys/sysctl.h> 75#include <net/if.h> 76#include <net/if_var.h> 77#include <net/bpf.h> /* BIOCIMMEDIATE */ 78#include <machine/bus.h> /* bus_dmamap_* */ 79#include <sys/endian.h> 80#include <sys/refcount.h> 81 82 83#define BDG_RWLOCK_T struct rwlock // struct rwlock 84 85#define BDG_RWINIT(b) \ 86 rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS) 87#define BDG_WLOCK(b) rw_wlock(&(b)->bdg_lock) 88#define BDG_WUNLOCK(b) rw_wunlock(&(b)->bdg_lock) 89#define BDG_RLOCK(b) rw_rlock(&(b)->bdg_lock) 90#define BDG_RTRYLOCK(b) rw_try_rlock(&(b)->bdg_lock) 91#define BDG_RUNLOCK(b) rw_runlock(&(b)->bdg_lock) 92#define BDG_RWDESTROY(b) rw_destroy(&(b)->bdg_lock) 93 94 95#elif defined(linux) 96 97#include "bsd_glue.h" 98 99#elif defined(__APPLE__) 100 101#warning OSX support is only partial 102#include "osx_glue.h" 103 104#else 105 106#error Unsupported platform 107 108#endif /* unsupported */ 109 110/* 111 * common headers 112 */ 113 114#include <net/netmap.h> 115#include <dev/netmap/netmap_kern.h> 116#include <dev/netmap/netmap_mem2.h> 117 118#ifdef WITH_VALE 119 120/* 121 * system parameters (most of them in netmap_kern.h) 122 * NM_NAME prefix for switch port names, default "vale" 123 * NM_BDG_MAXPORTS number of ports 124 * NM_BRIDGES max number of switches in the system. 125 * XXX should become a sysctl or tunable 126 * 127 * Switch ports are named valeX:Y where X is the switch name and Y 128 * is the port. If Y matches a physical interface name, the port is 129 * connected to a physical device. 130 * 131 * Unlike physical interfaces, switch ports use their own memory region 132 * for rings and buffers. 133 * The virtual interfaces use per-queue lock instead of core lock. 134 * In the tx loop, we aggregate traffic in batches to make all operations 135 * faster. The batch size is bridge_batch. 136 */ 137#define NM_BDG_MAXRINGS 16 /* XXX unclear how many. */ 138#define NM_BDG_MAXSLOTS 4096 /* XXX same as above */ 139#define NM_BRIDGE_RINGSIZE 1024 /* in the device */ 140#define NM_BDG_HASH 1024 /* forwarding table entries */ 141#define NM_BDG_BATCH 1024 /* entries in the forwarding buffer */ 142#define NM_MULTISEG 64 /* max size of a chain of bufs */ 143/* actual size of the tables */ 144#define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NM_MULTISEG) 145/* NM_FT_NULL terminates a list of slots in the ft */ 146#define NM_FT_NULL NM_BDG_BATCH_MAX 147#define NM_BRIDGES 8 /* number of bridges */ 148 149 150/* 151 * bridge_batch is set via sysctl to the max batch size to be 152 * used in the bridge. The actual value may be larger as the 153 * last packet in the block may overflow the size. 154 */ 155int bridge_batch = NM_BDG_BATCH; /* bridge batch size */ 156SYSCTL_DECL(_dev_netmap); 157SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , ""); 158 159 160static int bdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp); 161static int bdg_netmap_reg(struct netmap_adapter *na, int onoff); 162static int netmap_bwrap_attach(struct ifnet *, struct ifnet *); 163static int netmap_bwrap_register(struct netmap_adapter *, int onoff); 164int kern_netmap_regif(struct nmreq *nmr); 165 166/* 167 * Each transmit queue accumulates a batch of packets into 168 * a structure before forwarding. Packets to the same 169 * destination are put in a list using ft_next as a link field. 170 * ft_frags and ft_next are valid only on the first fragment. 171 */ 172struct nm_bdg_fwd { /* forwarding entry for a bridge */ 173 void *ft_buf; /* netmap or indirect buffer */ 174 uint8_t ft_frags; /* how many fragments (only on 1st frag) */ 175 uint8_t _ft_port; /* dst port (unused) */ 176 uint16_t ft_flags; /* flags, e.g. indirect */ 177 uint16_t ft_len; /* src fragment len */ 178 uint16_t ft_next; /* next packet to same destination */ 179}; 180 181/* 182 * For each output interface, nm_bdg_q is used to construct a list. 183 * bq_len is the number of output buffers (we can have coalescing 184 * during the copy). 185 */ 186struct nm_bdg_q { 187 uint16_t bq_head; 188 uint16_t bq_tail; 189 uint32_t bq_len; /* number of buffers */ 190}; 191 192/* XXX revise this */ 193struct nm_hash_ent { 194 uint64_t mac; /* the top 2 bytes are the epoch */ 195 uint64_t ports; 196}; 197 198/* 199 * nm_bridge is a descriptor for a VALE switch. 200 * Interfaces for a bridge are all in bdg_ports[]. 201 * The array has fixed size, an empty entry does not terminate 202 * the search, but lookups only occur on attach/detach so we 203 * don't mind if they are slow. 204 * 205 * The bridge is non blocking on the transmit ports: excess 206 * packets are dropped if there is no room on the output port. 207 * 208 * bdg_lock protects accesses to the bdg_ports array. 209 * This is a rw lock (or equivalent). 210 */ 211struct nm_bridge { 212 /* XXX what is the proper alignment/layout ? */ 213 BDG_RWLOCK_T bdg_lock; /* protects bdg_ports */ 214 int bdg_namelen; 215 uint32_t bdg_active_ports; /* 0 means free */ 216 char bdg_basename[IFNAMSIZ]; 217 218 /* Indexes of active ports (up to active_ports) 219 * and all other remaining ports. 220 */ 221 uint8_t bdg_port_index[NM_BDG_MAXPORTS]; 222 223 struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS]; 224 225 226 /* 227 * The function to decide the destination port. 228 * It returns either of an index of the destination port, 229 * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to 230 * forward this packet. ring_nr is the source ring index, and the 231 * function may overwrite this value to forward this packet to a 232 * different ring index. 233 * This function must be set by netmap_bdgctl(). 234 */ 235 bdg_lookup_fn_t nm_bdg_lookup; 236 237 /* the forwarding table, MAC+ports. 238 * XXX should be changed to an argument to be passed to 239 * the lookup function, and allocated on attach 240 */ 241 struct nm_hash_ent ht[NM_BDG_HASH]; 242}; 243 244 245/* 246 * XXX in principle nm_bridges could be created dynamically 247 * Right now we have a static array and deletions are protected 248 * by an exclusive lock. 249 */ 250struct nm_bridge nm_bridges[NM_BRIDGES]; 251 252 253/* 254 * this is a slightly optimized copy routine which rounds 255 * to multiple of 64 bytes and is often faster than dealing 256 * with other odd sizes. We assume there is enough room 257 * in the source and destination buffers. 258 * 259 * XXX only for multiples of 64 bytes, non overlapped. 260 */ 261static inline void 262pkt_copy(void *_src, void *_dst, int l) 263{ 264 uint64_t *src = _src; 265 uint64_t *dst = _dst; 266 if (unlikely(l >= 1024)) { 267 memcpy(dst, src, l); 268 return; 269 } 270 for (; likely(l > 0); l-=64) { 271 *dst++ = *src++; 272 *dst++ = *src++; 273 *dst++ = *src++; 274 *dst++ = *src++; 275 *dst++ = *src++; 276 *dst++ = *src++; 277 *dst++ = *src++; 278 *dst++ = *src++; 279 } 280} 281 282 283/* 284 * locate a bridge among the existing ones. 285 * MUST BE CALLED WITH NMG_LOCK() 286 * 287 * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME. 288 * We assume that this is called with a name of at least NM_NAME chars. 289 */ 290static struct nm_bridge * 291nm_find_bridge(const char *name, int create) 292{ 293 int i, l, namelen; 294 struct nm_bridge *b = NULL; 295 296 NMG_LOCK_ASSERT(); 297 298 namelen = strlen(NM_NAME); /* base length */ 299 l = name ? strlen(name) : 0; /* actual length */ 300 if (l < namelen) { 301 D("invalid bridge name %s", name ? name : NULL); 302 return NULL; 303 } 304 for (i = namelen + 1; i < l; i++) { 305 if (name[i] == ':') { 306 namelen = i; 307 break; 308 } 309 } 310 if (namelen >= IFNAMSIZ) 311 namelen = IFNAMSIZ; 312 ND("--- prefix is '%.*s' ---", namelen, name); 313 314 /* lookup the name, remember empty slot if there is one */ 315 for (i = 0; i < NM_BRIDGES; i++) { 316 struct nm_bridge *x = nm_bridges + i; 317 318 if (x->bdg_active_ports == 0) { 319 if (create && b == NULL) 320 b = x; /* record empty slot */ 321 } else if (x->bdg_namelen != namelen) { 322 continue; 323 } else if (strncmp(name, x->bdg_basename, namelen) == 0) { 324 ND("found '%.*s' at %d", namelen, name, i); 325 b = x; 326 break; 327 } 328 } 329 if (i == NM_BRIDGES && b) { /* name not found, can create entry */ 330 /* initialize the bridge */ 331 strncpy(b->bdg_basename, name, namelen); 332 ND("create new bridge %s with ports %d", b->bdg_basename, 333 b->bdg_active_ports); 334 b->bdg_namelen = namelen; 335 b->bdg_active_ports = 0; 336 for (i = 0; i < NM_BDG_MAXPORTS; i++) 337 b->bdg_port_index[i] = i; 338 /* set the default function */ 339 b->nm_bdg_lookup = netmap_bdg_learning; 340 /* reset the MAC address table */ 341 bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH); 342 } 343 return b; 344} 345 346 347/* 348 * Free the forwarding tables for rings attached to switch ports. 349 */ 350static void 351nm_free_bdgfwd(struct netmap_adapter *na) 352{ 353 int nrings, i; 354 struct netmap_kring *kring; 355 356 NMG_LOCK_ASSERT(); 357 nrings = na->num_tx_rings; 358 kring = na->tx_rings; 359 for (i = 0; i < nrings; i++) { 360 if (kring[i].nkr_ft) { 361 free(kring[i].nkr_ft, M_DEVBUF); 362 kring[i].nkr_ft = NULL; /* protect from freeing twice */ 363 } 364 } 365} 366 367 368/* 369 * Allocate the forwarding tables for the rings attached to the bridge ports. 370 */ 371static int 372nm_alloc_bdgfwd(struct netmap_adapter *na) 373{ 374 int nrings, l, i, num_dstq; 375 struct netmap_kring *kring; 376 377 NMG_LOCK_ASSERT(); 378 /* all port:rings + broadcast */ 379 num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1; 380 l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX; 381 l += sizeof(struct nm_bdg_q) * num_dstq; 382 l += sizeof(uint16_t) * NM_BDG_BATCH_MAX; 383 384 nrings = na->num_tx_rings + 1; 385 kring = na->tx_rings; 386 for (i = 0; i < nrings; i++) { 387 struct nm_bdg_fwd *ft; 388 struct nm_bdg_q *dstq; 389 int j; 390 391 ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO); 392 if (!ft) { 393 nm_free_bdgfwd(na); 394 return ENOMEM; 395 } 396 dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); 397 for (j = 0; j < num_dstq; j++) { 398 dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL; 399 dstq[j].bq_len = 0; 400 } 401 kring[i].nkr_ft = ft; 402 } 403 return 0; 404} 405 406 407static void 408netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw) 409{ 410 int s_hw = hw, s_sw = sw; 411 int i, lim =b->bdg_active_ports; 412 uint8_t tmp[NM_BDG_MAXPORTS]; 413 414 /* 415 New algorithm: 416 make a copy of bdg_port_index; 417 lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port 418 in the array of bdg_port_index, replacing them with 419 entries from the bottom of the array; 420 decrement bdg_active_ports; 421 acquire BDG_WLOCK() and copy back the array. 422 */ 423 424 D("detach %d and %d (lim %d)", hw, sw, lim); 425 /* make a copy of the list of active ports, update it, 426 * and then copy back within BDG_WLOCK(). 427 */ 428 memcpy(tmp, b->bdg_port_index, sizeof(tmp)); 429 for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) { 430 if (hw >= 0 && tmp[i] == hw) { 431 ND("detach hw %d at %d", hw, i); 432 lim--; /* point to last active port */ 433 tmp[i] = tmp[lim]; /* swap with i */ 434 tmp[lim] = hw; /* now this is inactive */ 435 hw = -1; 436 } else if (sw >= 0 && tmp[i] == sw) { 437 ND("detach sw %d at %d", sw, i); 438 lim--; 439 tmp[i] = tmp[lim]; 440 tmp[lim] = sw; 441 sw = -1; 442 } else { 443 i++; 444 } 445 } 446 if (hw >= 0 || sw >= 0) { 447 D("XXX delete failed hw %d sw %d, should panic...", hw, sw); 448 } 449 450 BDG_WLOCK(b); 451 b->bdg_ports[s_hw] = NULL; 452 if (s_sw >= 0) { 453 b->bdg_ports[s_sw] = NULL; 454 } 455 memcpy(b->bdg_port_index, tmp, sizeof(tmp)); 456 b->bdg_active_ports = lim; 457 BDG_WUNLOCK(b); 458 459 ND("now %d active ports", lim); 460 if (lim == 0) { 461 ND("marking bridge %s as free", b->bdg_basename); 462 b->nm_bdg_lookup = NULL; 463 } 464} 465 466 467static void 468netmap_adapter_vp_dtor(struct netmap_adapter *na) 469{ 470 struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na; 471 struct nm_bridge *b = vpna->na_bdg; 472 struct ifnet *ifp = na->ifp; 473 474 ND("%s has %d references", NM_IFPNAME(ifp), na->na_refcount); 475 476 if (b) { 477 netmap_bdg_detach_common(b, vpna->bdg_port, -1); 478 } 479 480 bzero(ifp, sizeof(*ifp)); 481 free(ifp, M_DEVBUF); 482 na->ifp = NULL; 483} 484 485 486/* Try to get a reference to a netmap adapter attached to a VALE switch. 487 * If the adapter is found (or is created), this function returns 0, a 488 * non NULL pointer is returned into *na, and the caller holds a 489 * reference to the adapter. 490 * If an adapter is not found, then no reference is grabbed and the 491 * function returns an error code, or 0 if there is just a VALE prefix 492 * mismatch. Therefore the caller holds a reference when 493 * (*na != NULL && return == 0). 494 */ 495int 496netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create) 497{ 498 const char *name = nmr->nr_name; 499 struct ifnet *ifp; 500 int error = 0; 501 struct netmap_adapter *ret; 502 struct netmap_vp_adapter *vpna; 503 struct nm_bridge *b; 504 int i, j, cand = -1, cand2 = -1; 505 int needed; 506 507 *na = NULL; /* default return value */ 508 509 /* first try to see if this is a bridge port. */ 510 NMG_LOCK_ASSERT(); 511 if (strncmp(name, NM_NAME, sizeof(NM_NAME) - 1)) { 512 return 0; /* no error, but no VALE prefix */ 513 } 514 515 b = nm_find_bridge(name, create); 516 if (b == NULL) { 517 D("no bridges available for '%s'", name); 518 return (ENXIO); 519 } 520 521 /* Now we are sure that name starts with the bridge's name, 522 * lookup the port in the bridge. We need to scan the entire 523 * list. It is not important to hold a WLOCK on the bridge 524 * during the search because NMG_LOCK already guarantees 525 * that there are no other possible writers. 526 */ 527 528 /* lookup in the local list of ports */ 529 for (j = 0; j < b->bdg_active_ports; j++) { 530 i = b->bdg_port_index[j]; 531 vpna = b->bdg_ports[i]; 532 // KASSERT(na != NULL); 533 ifp = vpna->up.ifp; 534 /* XXX make sure the name only contains one : */ 535 if (!strcmp(NM_IFPNAME(ifp), name)) { 536 netmap_adapter_get(&vpna->up); 537 ND("found existing if %s refs %d", name, 538 vpna->na_bdg_refcount); 539 *na = (struct netmap_adapter *)vpna; 540 return 0; 541 } 542 } 543 /* not found, should we create it? */ 544 if (!create) 545 return ENXIO; 546 /* yes we should, see if we have space to attach entries */ 547 needed = 2; /* in some cases we only need 1 */ 548 if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) { 549 D("bridge full %d, cannot create new port", b->bdg_active_ports); 550 return EINVAL; 551 } 552 /* record the next two ports available, but do not allocate yet */ 553 cand = b->bdg_port_index[b->bdg_active_ports]; 554 cand2 = b->bdg_port_index[b->bdg_active_ports + 1]; 555 ND("+++ bridge %s port %s used %d avail %d %d", 556 b->bdg_basename, name, b->bdg_active_ports, cand, cand2); 557 558 /* 559 * try see if there is a matching NIC with this name 560 * (after the bridge's name) 561 */ 562 ifp = ifunit_ref(name + b->bdg_namelen + 1); 563 if (!ifp) { /* this is a virtual port */ 564 if (nmr->nr_cmd) { 565 /* nr_cmd must be 0 for a virtual port */ 566 return EINVAL; 567 } 568 569 /* create a struct ifnet for the new port. 570 * need M_NOWAIT as we are under nma_lock 571 */ 572 ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO); 573 if (!ifp) 574 return ENOMEM; 575 576 strcpy(ifp->if_xname, name); 577 /* bdg_netmap_attach creates a struct netmap_adapter */ 578 error = bdg_netmap_attach(nmr, ifp); 579 if (error) { 580 D("error %d", error); 581 free(ifp, M_DEVBUF); 582 return error; 583 } 584 ret = NA(ifp); 585 cand2 = -1; /* only need one port */ 586 } else { /* this is a NIC */ 587 struct ifnet *fake_ifp; 588 589 error = netmap_get_hw_na(ifp, &ret); 590 if (error || ret == NULL) 591 goto out; 592 593 /* make sure the NIC is not already in use */ 594 if (NETMAP_OWNED_BY_ANY(ret)) { 595 D("NIC %s busy, cannot attach to bridge", 596 NM_IFPNAME(ifp)); 597 error = EINVAL; 598 goto out; 599 } 600 /* create a fake interface */ 601 fake_ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO); 602 if (!fake_ifp) { 603 error = ENOMEM; 604 goto out; 605 } 606 strcpy(fake_ifp->if_xname, name); 607 error = netmap_bwrap_attach(fake_ifp, ifp); 608 if (error) { 609 free(fake_ifp, M_DEVBUF); 610 goto out; 611 } 612 ret = NA(fake_ifp); 613 if (nmr->nr_arg1 != NETMAP_BDG_HOST) 614 cand2 = -1; /* only need one port */ 615 if_rele(ifp); 616 } 617 vpna = (struct netmap_vp_adapter *)ret; 618 619 BDG_WLOCK(b); 620 vpna->bdg_port = cand; 621 ND("NIC %p to bridge port %d", vpna, cand); 622 /* bind the port to the bridge (virtual ports are not active) */ 623 b->bdg_ports[cand] = vpna; 624 vpna->na_bdg = b; 625 b->bdg_active_ports++; 626 if (cand2 >= 0) { 627 struct netmap_vp_adapter *hostna = vpna + 1; 628 /* also bind the host stack to the bridge */ 629 b->bdg_ports[cand2] = hostna; 630 hostna->bdg_port = cand2; 631 hostna->na_bdg = b; 632 b->bdg_active_ports++; 633 ND("host %p to bridge port %d", hostna, cand2); 634 } 635 ND("if %s refs %d", name, vpna->up.na_refcount); 636 BDG_WUNLOCK(b); 637 *na = ret; 638 netmap_adapter_get(ret); 639 return 0; 640 641out: 642 if_rele(ifp); 643 644 return error; 645} 646 647 648/* Process NETMAP_BDG_ATTACH and NETMAP_BDG_DETACH */ 649static int 650nm_bdg_attach(struct nmreq *nmr) 651{ 652 struct netmap_adapter *na; 653 struct netmap_if *nifp; 654 struct netmap_priv_d *npriv; 655 struct netmap_bwrap_adapter *bna; 656 int error; 657 658 npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO); 659 if (npriv == NULL) 660 return ENOMEM; 661 NMG_LOCK(); 662 /* XXX probably netmap_get_bdg_na() */ 663 error = netmap_get_bdg_na(nmr, &na, 1 /* create if not exists */); 664 if (error) /* no device, or another bridge or user owns the device */ 665 goto unlock_exit; 666 if (na == NULL) { /* VALE prefix missing */ 667 error = EINVAL; 668 goto unlock_exit; 669 } 670 671 if (na->active_fds > 0) { /* already registered */ 672 error = EBUSY; 673 goto unref_exit; 674 } 675 676 nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, &error); 677 if (!nifp) { 678 goto unref_exit; 679 } 680 681 bna = (struct netmap_bwrap_adapter*)na; 682 bna->na_kpriv = npriv; 683 NMG_UNLOCK(); 684 ND("registered %s to netmap-mode", NM_IFPNAME(na->ifp)); 685 return 0; 686 687unref_exit: 688 netmap_adapter_put(na); 689unlock_exit: 690 NMG_UNLOCK(); 691 bzero(npriv, sizeof(*npriv)); 692 free(npriv, M_DEVBUF); 693 return error; 694} 695 696 697static int 698nm_bdg_detach(struct nmreq *nmr) 699{ 700 struct netmap_adapter *na; 701 int error; 702 struct netmap_bwrap_adapter *bna; 703 int last_instance; 704 705 NMG_LOCK(); 706 error = netmap_get_bdg_na(nmr, &na, 0 /* don't create */); 707 if (error) { /* no device, or another bridge or user owns the device */ 708 goto unlock_exit; 709 } 710 if (na == NULL) { /* VALE prefix missing */ 711 error = EINVAL; 712 goto unlock_exit; 713 } 714 715 bna = (struct netmap_bwrap_adapter *)na; 716 717 if (na->active_fds == 0) { /* not registered */ 718 error = EINVAL; 719 goto unref_exit; 720 } 721 722 last_instance = netmap_dtor_locked(bna->na_kpriv); /* unregister */ 723 if (!last_instance) { 724 D("--- error, trying to detach an entry with active mmaps"); 725 error = EINVAL; 726 } else { 727 struct netmap_priv_d *npriv = bna->na_kpriv; 728 729 bna->na_kpriv = NULL; 730 D("deleting priv"); 731 732 bzero(npriv, sizeof(*npriv)); 733 free(npriv, M_DEVBUF); 734 } 735 736unref_exit: 737 netmap_adapter_put(na); 738unlock_exit: 739 NMG_UNLOCK(); 740 return error; 741 742} 743 744 745/* exported to kernel callers, e.g. OVS ? 746 * Entry point. 747 * Called without NMG_LOCK. 748 */ 749int 750netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func) 751{ 752 struct nm_bridge *b; 753 struct netmap_adapter *na; 754 struct netmap_vp_adapter *vpna; 755 struct ifnet *iter; 756 char *name = nmr->nr_name; 757 int cmd = nmr->nr_cmd, namelen = strlen(name); 758 int error = 0, i, j; 759 760 switch (cmd) { 761 case NETMAP_BDG_ATTACH: 762 error = nm_bdg_attach(nmr); 763 break; 764 765 case NETMAP_BDG_DETACH: 766 error = nm_bdg_detach(nmr); 767 break; 768 769 case NETMAP_BDG_LIST: 770 /* this is used to enumerate bridges and ports */ 771 if (namelen) { /* look up indexes of bridge and port */ 772 if (strncmp(name, NM_NAME, strlen(NM_NAME))) { 773 error = EINVAL; 774 break; 775 } 776 NMG_LOCK(); 777 b = nm_find_bridge(name, 0 /* don't create */); 778 if (!b) { 779 error = ENOENT; 780 NMG_UNLOCK(); 781 break; 782 } 783 784 error = ENOENT; 785 for (j = 0; j < b->bdg_active_ports; j++) { 786 i = b->bdg_port_index[j]; 787 vpna = b->bdg_ports[i]; 788 if (vpna == NULL) { 789 D("---AAAAAAAAARGH-------"); 790 continue; 791 } 792 iter = vpna->up.ifp; 793 /* the former and the latter identify a 794 * virtual port and a NIC, respectively 795 */ 796 if (!strcmp(iter->if_xname, name)) { 797 /* bridge index */ 798 nmr->nr_arg1 = b - nm_bridges; 799 nmr->nr_arg2 = i; /* port index */ 800 error = 0; 801 break; 802 } 803 } 804 NMG_UNLOCK(); 805 } else { 806 /* return the first non-empty entry starting from 807 * bridge nr_arg1 and port nr_arg2. 808 * 809 * Users can detect the end of the same bridge by 810 * seeing the new and old value of nr_arg1, and can 811 * detect the end of all the bridge by error != 0 812 */ 813 i = nmr->nr_arg1; 814 j = nmr->nr_arg2; 815 816 NMG_LOCK(); 817 for (error = ENOENT; i < NM_BRIDGES; i++) { 818 b = nm_bridges + i; 819 if (j >= b->bdg_active_ports) { 820 j = 0; /* following bridges scan from 0 */ 821 continue; 822 } 823 nmr->nr_arg1 = i; 824 nmr->nr_arg2 = j; 825 j = b->bdg_port_index[j]; 826 vpna = b->bdg_ports[j]; 827 iter = vpna->up.ifp; 828 strncpy(name, iter->if_xname, (size_t)IFNAMSIZ); 829 error = 0; 830 break; 831 } 832 NMG_UNLOCK(); 833 } 834 break; 835 836 case NETMAP_BDG_LOOKUP_REG: 837 /* register a lookup function to the given bridge. 838 * nmr->nr_name may be just bridge's name (including ':' 839 * if it is not just NM_NAME). 840 */ 841 if (!func) { 842 error = EINVAL; 843 break; 844 } 845 NMG_LOCK(); 846 b = nm_find_bridge(name, 0 /* don't create */); 847 if (!b) { 848 error = EINVAL; 849 } else { 850 b->nm_bdg_lookup = func; 851 } 852 NMG_UNLOCK(); 853 break; 854 855 case NETMAP_BDG_OFFSET: 856 NMG_LOCK(); 857 error = netmap_get_bdg_na(nmr, &na, 0); 858 if (na && !error) { 859 vpna = (struct netmap_vp_adapter *)na; 860 if (nmr->nr_arg1 > NETMAP_BDG_MAX_OFFSET) 861 nmr->nr_arg1 = NETMAP_BDG_MAX_OFFSET; 862 vpna->offset = nmr->nr_arg1; 863 D("Using offset %d for %p", vpna->offset, vpna); 864 netmap_adapter_put(na); 865 } 866 NMG_UNLOCK(); 867 break; 868 869 default: 870 D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd); 871 error = EINVAL; 872 break; 873 } 874 return error; 875} 876 877 878static int 879netmap_vp_krings_create(struct netmap_adapter *na) 880{ 881 u_int ntx, nrx, tailroom; 882 int error, i; 883 uint32_t *leases; 884 885 /* XXX vps do not need host rings, 886 * but we crash if we don't have one 887 */ 888 ntx = na->num_tx_rings + 1; 889 nrx = na->num_rx_rings + 1; 890 891 /* 892 * Leases are attached to RX rings on vale ports 893 */ 894 tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx; 895 896 error = netmap_krings_create(na, ntx, nrx, tailroom); 897 if (error) 898 return error; 899 900 leases = na->tailroom; 901 902 for (i = 0; i < nrx; i++) { /* Receive rings */ 903 na->rx_rings[i].nkr_leases = leases; 904 leases += na->num_rx_desc; 905 } 906 907 error = nm_alloc_bdgfwd(na); 908 if (error) { 909 netmap_krings_delete(na); 910 return error; 911 } 912 913 return 0; 914} 915 916 917static void 918netmap_vp_krings_delete(struct netmap_adapter *na) 919{ 920 nm_free_bdgfwd(na); 921 netmap_krings_delete(na); 922} 923 924 925static int 926nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, 927 struct netmap_vp_adapter *na, u_int ring_nr); 928 929 930/* 931 * Grab packets from a kring, move them into the ft structure 932 * associated to the tx (input) port. Max one instance per port, 933 * filtered on input (ioctl, poll or XXX). 934 * Returns the next position in the ring. 935 */ 936static int 937nm_bdg_preflush(struct netmap_vp_adapter *na, u_int ring_nr, 938 struct netmap_kring *kring, u_int end) 939{ 940 struct netmap_ring *ring = kring->ring; 941 struct nm_bdg_fwd *ft; 942 u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1; 943 u_int ft_i = 0; /* start from 0 */ 944 u_int frags = 1; /* how many frags ? */ 945 struct nm_bridge *b = na->na_bdg; 946 947 /* To protect against modifications to the bridge we acquire a 948 * shared lock, waiting if we can sleep (if the source port is 949 * attached to a user process) or with a trylock otherwise (NICs). 950 */ 951 ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j); 952 if (na->up.na_flags & NAF_BDG_MAYSLEEP) 953 BDG_RLOCK(b); 954 else if (!BDG_RTRYLOCK(b)) 955 return 0; 956 ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j); 957 ft = kring->nkr_ft; 958 959 for (; likely(j != end); j = nm_next(j, lim)) { 960 struct netmap_slot *slot = &ring->slot[j]; 961 char *buf; 962 963 ft[ft_i].ft_len = slot->len; 964 ft[ft_i].ft_flags = slot->flags; 965 966 ND("flags is 0x%x", slot->flags); 967 /* this slot goes into a list so initialize the link field */ 968 ft[ft_i].ft_next = NM_FT_NULL; 969 buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ? 970 (void *)(uintptr_t)slot->ptr : BDG_NMB(&na->up, slot); 971 __builtin_prefetch(buf); 972 ++ft_i; 973 if (slot->flags & NS_MOREFRAG) { 974 frags++; 975 continue; 976 } 977 if (unlikely(netmap_verbose && frags > 1)) 978 RD(5, "%d frags at %d", frags, ft_i - frags); 979 ft[ft_i - frags].ft_frags = frags; 980 frags = 1; 981 if (unlikely((int)ft_i >= bridge_batch)) 982 ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); 983 } 984 if (frags > 1) { 985 D("truncate incomplete fragment at %d (%d frags)", ft_i, frags); 986 // ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG 987 ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG; 988 ft[ft_i - frags].ft_frags = frags - 1; 989 } 990 if (ft_i) 991 ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); 992 BDG_RUNLOCK(b); 993 return j; 994} 995 996 997/* ----- FreeBSD if_bridge hash function ------- */ 998 999/* 1000 * The following hash function is adapted from "Hash Functions" by Bob Jenkins 1001 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997). 1002 * 1003 * http://www.burtleburtle.net/bob/hash/spooky.html 1004 */ 1005#define mix(a, b, c) \ 1006do { \ 1007 a -= b; a -= c; a ^= (c >> 13); \ 1008 b -= c; b -= a; b ^= (a << 8); \ 1009 c -= a; c -= b; c ^= (b >> 13); \ 1010 a -= b; a -= c; a ^= (c >> 12); \ 1011 b -= c; b -= a; b ^= (a << 16); \ 1012 c -= a; c -= b; c ^= (b >> 5); \ 1013 a -= b; a -= c; a ^= (c >> 3); \ 1014 b -= c; b -= a; b ^= (a << 10); \ 1015 c -= a; c -= b; c ^= (b >> 15); \ 1016} while (/*CONSTCOND*/0) 1017 1018 1019static __inline uint32_t 1020nm_bridge_rthash(const uint8_t *addr) 1021{ 1022 uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key 1023 1024 b += addr[5] << 8; 1025 b += addr[4]; 1026 a += addr[3] << 24; 1027 a += addr[2] << 16; 1028 a += addr[1] << 8; 1029 a += addr[0]; 1030 1031 mix(a, b, c); 1032#define BRIDGE_RTHASH_MASK (NM_BDG_HASH-1) 1033 return (c & BRIDGE_RTHASH_MASK); 1034} 1035 1036#undef mix 1037 1038 1039static int 1040bdg_netmap_reg(struct netmap_adapter *na, int onoff) 1041{ 1042 struct netmap_vp_adapter *vpna = 1043 (struct netmap_vp_adapter*)na; 1044 struct ifnet *ifp = na->ifp; 1045 1046 /* the interface is already attached to the bridge, 1047 * so we only need to toggle IFCAP_NETMAP. 1048 */ 1049 BDG_WLOCK(vpna->na_bdg); 1050 if (onoff) { 1051 ifp->if_capenable |= IFCAP_NETMAP; 1052 } else { 1053 ifp->if_capenable &= ~IFCAP_NETMAP; 1054 } 1055 BDG_WUNLOCK(vpna->na_bdg); 1056 return 0; 1057} 1058 1059 1060/* 1061 * Lookup function for a learning bridge. 1062 * Update the hash table with the source address, 1063 * and then returns the destination port index, and the 1064 * ring in *dst_ring (at the moment, always use ring 0) 1065 */ 1066u_int 1067netmap_bdg_learning(char *buf, u_int buf_len, uint8_t *dst_ring, 1068 struct netmap_vp_adapter *na) 1069{ 1070 struct nm_hash_ent *ht = na->na_bdg->ht; 1071 uint32_t sh, dh; 1072 u_int dst, mysrc = na->bdg_port; 1073 uint64_t smac, dmac; 1074 1075 if (buf_len < 14) { 1076 D("invalid buf length %d", buf_len); 1077 return NM_BDG_NOPORT; 1078 } 1079 dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff; 1080 smac = le64toh(*(uint64_t *)(buf + 4)); 1081 smac >>= 16; 1082 1083 /* 1084 * The hash is somewhat expensive, there might be some 1085 * worthwhile optimizations here. 1086 */ 1087 if ((buf[6] & 1) == 0) { /* valid src */ 1088 uint8_t *s = buf+6; 1089 sh = nm_bridge_rthash(s); // XXX hash of source 1090 /* update source port forwarding entry */ 1091 ht[sh].mac = smac; /* XXX expire ? */ 1092 ht[sh].ports = mysrc; 1093 if (netmap_verbose) 1094 D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d", 1095 s[0], s[1], s[2], s[3], s[4], s[5], mysrc); 1096 } 1097 dst = NM_BDG_BROADCAST; 1098 if ((buf[0] & 1) == 0) { /* unicast */ 1099 dh = nm_bridge_rthash(buf); // XXX hash of dst 1100 if (ht[dh].mac == dmac) { /* found dst */ 1101 dst = ht[dh].ports; 1102 } 1103 /* XXX otherwise return NM_BDG_UNKNOWN ? */ 1104 } 1105 *dst_ring = 0; 1106 return dst; 1107} 1108 1109 1110/* 1111 * Available space in the ring. Only used in VALE code 1112 * and only with is_rx = 1 1113 */ 1114static inline uint32_t 1115nm_kr_space(struct netmap_kring *k, int is_rx) 1116{ 1117 int space; 1118 1119 if (is_rx) { 1120 int busy = k->nkr_hwlease - k->nr_hwcur; 1121 if (busy < 0) 1122 busy += k->nkr_num_slots; 1123 space = k->nkr_num_slots - 1 - busy; 1124 } else { 1125 /* XXX never used in this branch */ 1126 space = k->nr_hwtail - k->nkr_hwlease; 1127 if (space < 0) 1128 space += k->nkr_num_slots; 1129 } 1130#if 0 1131 // sanity check 1132 if (k->nkr_hwlease >= k->nkr_num_slots || 1133 k->nr_hwcur >= k->nkr_num_slots || 1134 k->nr_tail >= k->nkr_num_slots || 1135 busy < 0 || 1136 busy >= k->nkr_num_slots) { 1137 D("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d", k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease, 1138 k->nkr_lease_idx, k->nkr_num_slots); 1139 } 1140#endif 1141 return space; 1142} 1143 1144 1145 1146 1147/* make a lease on the kring for N positions. return the 1148 * lease index 1149 * XXX only used in VALE code and with is_rx = 1 1150 */ 1151static inline uint32_t 1152nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx) 1153{ 1154 uint32_t lim = k->nkr_num_slots - 1; 1155 uint32_t lease_idx = k->nkr_lease_idx; 1156 1157 k->nkr_leases[lease_idx] = NR_NOSLOT; 1158 k->nkr_lease_idx = nm_next(lease_idx, lim); 1159 1160 if (n > nm_kr_space(k, is_rx)) { 1161 D("invalid request for %d slots", n); 1162 panic("x"); 1163 } 1164 /* XXX verify that there are n slots */ 1165 k->nkr_hwlease += n; 1166 if (k->nkr_hwlease > lim) 1167 k->nkr_hwlease -= lim + 1; 1168 1169 if (k->nkr_hwlease >= k->nkr_num_slots || 1170 k->nr_hwcur >= k->nkr_num_slots || 1171 k->nr_hwtail >= k->nkr_num_slots || 1172 k->nkr_lease_idx >= k->nkr_num_slots) { 1173 D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d", 1174 k->na->ifp->if_xname, 1175 k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease, 1176 k->nkr_lease_idx, k->nkr_num_slots); 1177 } 1178 return lease_idx; 1179} 1180 1181/* 1182 * This flush routine supports only unicast and broadcast but a large 1183 * number of ports, and lets us replace the learn and dispatch functions. 1184 */ 1185int 1186nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, 1187 u_int ring_nr) 1188{ 1189 struct nm_bdg_q *dst_ents, *brddst; 1190 uint16_t num_dsts = 0, *dsts; 1191 struct nm_bridge *b = na->na_bdg; 1192 u_int i, j, me = na->bdg_port; 1193 1194 /* 1195 * The work area (pointed by ft) is followed by an array of 1196 * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS 1197 * queues per port plus one for the broadcast traffic. 1198 * Then we have an array of destination indexes. 1199 */ 1200 dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); 1201 dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1); 1202 1203 /* first pass: find a destination for each packet in the batch */ 1204 for (i = 0; likely(i < n); i += ft[i].ft_frags) { 1205 uint8_t dst_ring = ring_nr; /* default, same ring as origin */ 1206 uint16_t dst_port, d_i; 1207 struct nm_bdg_q *d; 1208 uint8_t *buf = ft[i].ft_buf; 1209 u_int len = ft[i].ft_len; 1210 1211 ND("slot %d frags %d", i, ft[i].ft_frags); 1212 /* Drop the packet if the offset is not into the first 1213 fragment nor at the very beginning of the second. */ 1214 if (unlikely(na->offset > len)) 1215 continue; 1216 if (len == na->offset) { 1217 buf = ft[i+1].ft_buf; 1218 len = ft[i+1].ft_len; 1219 } else { 1220 buf += na->offset; 1221 len -= na->offset; 1222 } 1223 dst_port = b->nm_bdg_lookup(buf, len, &dst_ring, na); 1224 if (netmap_verbose > 255) 1225 RD(5, "slot %d port %d -> %d", i, me, dst_port); 1226 if (dst_port == NM_BDG_NOPORT) 1227 continue; /* this packet is identified to be dropped */ 1228 else if (unlikely(dst_port > NM_BDG_MAXPORTS)) 1229 continue; 1230 else if (dst_port == NM_BDG_BROADCAST) 1231 dst_ring = 0; /* broadcasts always go to ring 0 */ 1232 else if (unlikely(dst_port == me || 1233 !b->bdg_ports[dst_port])) 1234 continue; 1235 1236 /* get a position in the scratch pad */ 1237 d_i = dst_port * NM_BDG_MAXRINGS + dst_ring; 1238 d = dst_ents + d_i; 1239 1240 /* append the first fragment to the list */ 1241 if (d->bq_head == NM_FT_NULL) { /* new destination */ 1242 d->bq_head = d->bq_tail = i; 1243 /* remember this position to be scanned later */ 1244 if (dst_port != NM_BDG_BROADCAST) 1245 dsts[num_dsts++] = d_i; 1246 } else { 1247 ft[d->bq_tail].ft_next = i; 1248 d->bq_tail = i; 1249 } 1250 d->bq_len += ft[i].ft_frags; 1251 } 1252 1253 /* 1254 * Broadcast traffic goes to ring 0 on all destinations. 1255 * So we need to add these rings to the list of ports to scan. 1256 * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is 1257 * expensive. We should keep a compact list of active destinations 1258 * so we could shorten this loop. 1259 */ 1260 brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS; 1261 if (brddst->bq_head != NM_FT_NULL) { 1262 for (j = 0; likely(j < b->bdg_active_ports); j++) { 1263 uint16_t d_i; 1264 i = b->bdg_port_index[j]; 1265 if (unlikely(i == me)) 1266 continue; 1267 d_i = i * NM_BDG_MAXRINGS; 1268 if (dst_ents[d_i].bq_head == NM_FT_NULL) 1269 dsts[num_dsts++] = d_i; 1270 } 1271 } 1272 1273 ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts); 1274 /* second pass: scan destinations (XXX will be modular somehow) */ 1275 for (i = 0; i < num_dsts; i++) { 1276 struct ifnet *dst_ifp; 1277 struct netmap_vp_adapter *dst_na; 1278 struct netmap_kring *kring; 1279 struct netmap_ring *ring; 1280 u_int dst_nr, lim, j, sent = 0, d_i, next, brd_next; 1281 u_int needed, howmany; 1282 int retry = netmap_txsync_retry; 1283 struct nm_bdg_q *d; 1284 uint32_t my_start = 0, lease_idx = 0; 1285 int nrings; 1286 int offset_mismatch; 1287 1288 d_i = dsts[i]; 1289 ND("second pass %d port %d", i, d_i); 1290 d = dst_ents + d_i; 1291 // XXX fix the division 1292 dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS]; 1293 /* protect from the lookup function returning an inactive 1294 * destination port 1295 */ 1296 if (unlikely(dst_na == NULL)) 1297 goto cleanup; 1298 if (dst_na->up.na_flags & NAF_SW_ONLY) 1299 goto cleanup; 1300 dst_ifp = dst_na->up.ifp; 1301 /* 1302 * The interface may be in !netmap mode in two cases: 1303 * - when na is attached but not activated yet; 1304 * - when na is being deactivated but is still attached. 1305 */ 1306 if (unlikely(!(dst_ifp->if_capenable & IFCAP_NETMAP))) { 1307 ND("not in netmap mode!"); 1308 goto cleanup; 1309 } 1310 1311 offset_mismatch = (dst_na->offset != na->offset); 1312 1313 /* there is at least one either unicast or broadcast packet */ 1314 brd_next = brddst->bq_head; 1315 next = d->bq_head; 1316 /* we need to reserve this many slots. If fewer are 1317 * available, some packets will be dropped. 1318 * Packets may have multiple fragments, so we may not use 1319 * there is a chance that we may not use all of the slots 1320 * we have claimed, so we will need to handle the leftover 1321 * ones when we regain the lock. 1322 */ 1323 needed = d->bq_len + brddst->bq_len; 1324 1325 ND(5, "pass 2 dst %d is %x %s", 1326 i, d_i, is_vp ? "virtual" : "nic/host"); 1327 dst_nr = d_i & (NM_BDG_MAXRINGS-1); 1328 nrings = dst_na->up.num_rx_rings; 1329 if (dst_nr >= nrings) 1330 dst_nr = dst_nr % nrings; 1331 kring = &dst_na->up.rx_rings[dst_nr]; 1332 ring = kring->ring; 1333 lim = kring->nkr_num_slots - 1; 1334 1335retry: 1336 1337 /* reserve the buffers in the queue and an entry 1338 * to report completion, and drop lock. 1339 * XXX this might become a helper function. 1340 */ 1341 mtx_lock(&kring->q_lock); 1342 if (kring->nkr_stopped) { 1343 mtx_unlock(&kring->q_lock); 1344 goto cleanup; 1345 } 1346 if (dst_na->retry) { 1347 dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0); 1348 } 1349 my_start = j = kring->nkr_hwlease; 1350 howmany = nm_kr_space(kring, 1); 1351 if (needed < howmany) 1352 howmany = needed; 1353 lease_idx = nm_kr_lease(kring, howmany, 1); 1354 mtx_unlock(&kring->q_lock); 1355 1356 /* only retry if we need more than available slots */ 1357 if (retry && needed <= howmany) 1358 retry = 0; 1359 1360 /* copy to the destination queue */ 1361 while (howmany > 0) { 1362 struct netmap_slot *slot; 1363 struct nm_bdg_fwd *ft_p, *ft_end; 1364 u_int cnt; 1365 int fix_mismatch = offset_mismatch; 1366 1367 /* find the queue from which we pick next packet. 1368 * NM_FT_NULL is always higher than valid indexes 1369 * so we never dereference it if the other list 1370 * has packets (and if both are empty we never 1371 * get here). 1372 */ 1373 if (next < brd_next) { 1374 ft_p = ft + next; 1375 next = ft_p->ft_next; 1376 } else { /* insert broadcast */ 1377 ft_p = ft + brd_next; 1378 brd_next = ft_p->ft_next; 1379 } 1380 cnt = ft_p->ft_frags; // cnt > 0 1381 if (unlikely(cnt > howmany)) 1382 break; /* no more space */ 1383 howmany -= cnt; 1384 if (netmap_verbose && cnt > 1) 1385 RD(5, "rx %d frags to %d", cnt, j); 1386 ft_end = ft_p + cnt; 1387 do { 1388 char *dst, *src = ft_p->ft_buf; 1389 size_t copy_len = ft_p->ft_len, dst_len = copy_len; 1390 1391 slot = &ring->slot[j]; 1392 dst = BDG_NMB(&dst_na->up, slot); 1393 1394 if (unlikely(fix_mismatch)) { 1395 /* We are processing the first fragment 1396 * and there is a mismatch between source 1397 * and destination offsets. Create a zeroed 1398 * header for the destination, independently 1399 * of the source header length and content. 1400 */ 1401 src += na->offset; 1402 copy_len -= na->offset; 1403 bzero(dst, dst_na->offset); 1404 dst += dst_na->offset; 1405 dst_len = dst_na->offset + copy_len; 1406 /* fix the first fragment only */ 1407 fix_mismatch = 0; 1408 /* Here it could be copy_len == dst_len == 0, 1409 * and so a zero length fragment is passed. 1410 */ 1411 } 1412 1413 ND("send [%d] %d(%d) bytes at %s:%d", 1414 i, (int)copy_len, (int)dst_len, 1415 NM_IFPNAME(dst_ifp), j); 1416 /* round to a multiple of 64 */ 1417 copy_len = (copy_len + 63) & ~63; 1418 1419 if (ft_p->ft_flags & NS_INDIRECT) { 1420 if (copyin(src, dst, copy_len)) { 1421 // invalid user pointer, pretend len is 0 1422 dst_len = 0; 1423 } 1424 } else { 1425 //memcpy(dst, src, copy_len); 1426 pkt_copy(src, dst, (int)copy_len); 1427 } 1428 slot->len = dst_len; 1429 slot->flags = (cnt << 8)| NS_MOREFRAG; 1430 j = nm_next(j, lim); 1431 ft_p++; 1432 sent++; 1433 } while (ft_p != ft_end); 1434 slot->flags = (cnt << 8); /* clear flag on last entry */ 1435 /* are we done ? */ 1436 if (next == NM_FT_NULL && brd_next == NM_FT_NULL) 1437 break; 1438 } 1439 { 1440 /* current position */ 1441 uint32_t *p = kring->nkr_leases; /* shorthand */ 1442 uint32_t update_pos; 1443 int still_locked = 1; 1444 1445 mtx_lock(&kring->q_lock); 1446 if (unlikely(howmany > 0)) { 1447 /* not used all bufs. If i am the last one 1448 * i can recover the slots, otherwise must 1449 * fill them with 0 to mark empty packets. 1450 */ 1451 ND("leftover %d bufs", howmany); 1452 if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) { 1453 /* yes i am the last one */ 1454 ND("roll back nkr_hwlease to %d", j); 1455 kring->nkr_hwlease = j; 1456 } else { 1457 while (howmany-- > 0) { 1458 ring->slot[j].len = 0; 1459 ring->slot[j].flags = 0; 1460 j = nm_next(j, lim); 1461 } 1462 } 1463 } 1464 p[lease_idx] = j; /* report I am done */ 1465 1466 update_pos = kring->nr_hwtail; 1467 1468 if (my_start == update_pos) { 1469 /* all slots before my_start have been reported, 1470 * so scan subsequent leases to see if other ranges 1471 * have been completed, and to a selwakeup or txsync. 1472 */ 1473 while (lease_idx != kring->nkr_lease_idx && 1474 p[lease_idx] != NR_NOSLOT) { 1475 j = p[lease_idx]; 1476 p[lease_idx] = NR_NOSLOT; 1477 lease_idx = nm_next(lease_idx, lim); 1478 } 1479 /* j is the new 'write' position. j != my_start 1480 * means there are new buffers to report 1481 */ 1482 if (likely(j != my_start)) { 1483 kring->nr_hwtail = j; 1484 dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0); 1485 still_locked = 0; 1486 mtx_unlock(&kring->q_lock); 1487 if (dst_na->retry && retry--) 1488 goto retry; 1489 } 1490 } 1491 if (still_locked) 1492 mtx_unlock(&kring->q_lock); 1493 } 1494cleanup: 1495 d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */ 1496 d->bq_len = 0; 1497 } 1498 brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */ 1499 brddst->bq_len = 0; 1500 return 0; 1501} 1502 1503 1504static int 1505netmap_vp_txsync(struct netmap_vp_adapter *na, u_int ring_nr, int flags) 1506{ 1507 struct netmap_kring *kring = &na->up.tx_rings[ring_nr]; 1508 u_int done; 1509 u_int const lim = kring->nkr_num_slots - 1; 1510 u_int const cur = kring->rcur; 1511 1512 if (bridge_batch <= 0) { /* testing only */ 1513 done = cur; // used all 1514 goto done; 1515 } 1516 if (bridge_batch > NM_BDG_BATCH) 1517 bridge_batch = NM_BDG_BATCH; 1518 1519 done = nm_bdg_preflush(na, ring_nr, kring, cur); 1520done: 1521 if (done != cur) 1522 D("early break at %d/ %d, tail %d", done, cur, kring->nr_hwtail); 1523 /* 1524 * packets between 'done' and 'cur' are left unsent. 1525 */ 1526 kring->nr_hwcur = done; 1527 kring->nr_hwtail = nm_prev(done, lim); 1528 nm_txsync_finalize(kring); 1529 if (netmap_verbose) 1530 D("%s ring %d flags %d", NM_IFPNAME(na->up.ifp), ring_nr, flags); 1531 return 0; 1532} 1533 1534 1535/* 1536 * main dispatch routine for the bridge. 1537 * We already know that only one thread is running this. 1538 * we must run nm_bdg_preflush without lock. 1539 */ 1540static int 1541bdg_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) 1542{ 1543 struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na; 1544 return netmap_vp_txsync(vpna, ring_nr, flags); 1545} 1546 1547static int 1548netmap_vp_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) 1549{ 1550 struct netmap_kring *kring = &na->rx_rings[ring_nr]; 1551 struct netmap_ring *ring = kring->ring; 1552 u_int nm_i, lim = kring->nkr_num_slots - 1; 1553 u_int head = nm_rxsync_prologue(kring); 1554 int n; 1555 1556 if (head > lim) { 1557 D("ouch dangerous reset!!!"); 1558 n = netmap_ring_reinit(kring); 1559 goto done; 1560 } 1561 1562 /* First part, import newly received packets. */ 1563 /* actually nothing to do here, they are already in the kring */ 1564 1565 /* Second part, skip past packets that userspace has released. */ 1566 nm_i = kring->nr_hwcur; 1567 if (nm_i != head) { 1568 /* consistency check, but nothing really important here */ 1569 for (n = 0; likely(nm_i != head); n++) { 1570 struct netmap_slot *slot = &ring->slot[nm_i]; 1571 void *addr = BDG_NMB(na, slot); 1572 1573 if (addr == netmap_buffer_base) { /* bad buf */ 1574 D("bad buffer index %d, ignore ?", 1575 slot->buf_idx); 1576 } 1577 slot->flags &= ~NS_BUF_CHANGED; 1578 nm_i = nm_next(nm_i, lim); 1579 } 1580 kring->nr_hwcur = head; 1581 } 1582 1583 /* tell userspace that there are new packets */ 1584 nm_rxsync_finalize(kring); 1585 n = 0; 1586done: 1587 return n; 1588} 1589 1590/* 1591 * user process reading from a VALE switch. 1592 * Already protected against concurrent calls from userspace, 1593 * but we must acquire the queue's lock to protect against 1594 * writers on the same queue. 1595 */ 1596static int 1597bdg_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) 1598{ 1599 struct netmap_kring *kring = &na->rx_rings[ring_nr]; 1600 int n; 1601 1602 mtx_lock(&kring->q_lock); 1603 n = netmap_vp_rxsync(na, ring_nr, flags); 1604 mtx_unlock(&kring->q_lock); 1605 return n; 1606} 1607 1608 1609static int 1610bdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp) 1611{ 1612 struct netmap_vp_adapter *vpna; 1613 struct netmap_adapter *na; 1614 int error; 1615 1616 vpna = malloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO); 1617 if (vpna == NULL) 1618 return ENOMEM; 1619 1620 na = &vpna->up; 1621 1622 na->ifp = ifp; 1623 1624 /* bound checking */ 1625 na->num_tx_rings = nmr->nr_tx_rings; 1626 nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); 1627 nmr->nr_tx_rings = na->num_tx_rings; // write back 1628 na->num_rx_rings = nmr->nr_rx_rings; 1629 nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); 1630 nmr->nr_rx_rings = na->num_rx_rings; // write back 1631 nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE, 1632 1, NM_BDG_MAXSLOTS, NULL); 1633 na->num_tx_desc = nmr->nr_tx_slots; 1634 nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE, 1635 1, NM_BDG_MAXSLOTS, NULL); 1636 na->num_rx_desc = nmr->nr_rx_slots; 1637 vpna->offset = 0; 1638 1639 na->na_flags |= NAF_BDG_MAYSLEEP | NAF_MEM_OWNER; 1640 na->nm_txsync = bdg_netmap_txsync; 1641 na->nm_rxsync = bdg_netmap_rxsync; 1642 na->nm_register = bdg_netmap_reg; 1643 na->nm_dtor = netmap_adapter_vp_dtor; 1644 na->nm_krings_create = netmap_vp_krings_create; 1645 na->nm_krings_delete = netmap_vp_krings_delete; 1646 na->nm_mem = netmap_mem_private_new(NM_IFPNAME(na->ifp), 1647 na->num_tx_rings, na->num_tx_desc, 1648 na->num_rx_rings, na->num_rx_desc); 1649 /* other nmd fields are set in the common routine */ 1650 error = netmap_attach_common(na); 1651 if (error) { 1652 free(vpna, M_DEVBUF); 1653 return error; 1654 } 1655 return 0; 1656} 1657 1658 1659static void 1660netmap_bwrap_dtor(struct netmap_adapter *na) 1661{ 1662 struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; 1663 struct netmap_adapter *hwna = bna->hwna; 1664 struct nm_bridge *b = bna->up.na_bdg, 1665 *bh = bna->host.na_bdg; 1666 struct ifnet *ifp = na->ifp; 1667 1668 ND("na %p", na); 1669 1670 if (b) { 1671 netmap_bdg_detach_common(b, bna->up.bdg_port, 1672 (bh ? bna->host.bdg_port : -1)); 1673 } 1674 1675 hwna->na_private = NULL; 1676 netmap_adapter_put(hwna); 1677 1678 bzero(ifp, sizeof(*ifp)); 1679 free(ifp, M_DEVBUF); 1680 na->ifp = NULL; 1681 1682} 1683 1684 1685/* 1686 * Intr callback for NICs connected to a bridge. 1687 * Simply ignore tx interrupts (maybe we could try to recover space ?) 1688 * and pass received packets from nic to the bridge. 1689 * 1690 * XXX TODO check locking: this is called from the interrupt 1691 * handler so we should make sure that the interface is not 1692 * disconnected while passing down an interrupt. 1693 * 1694 * Note, no user process can access this NIC or the host stack. 1695 * The only part of the ring that is significant are the slots, 1696 * and head/cur/tail are set from the kring as needed 1697 * (part as a receive ring, part as a transmit ring). 1698 * 1699 * callback that overwrites the hwna notify callback. 1700 * Packets come from the outside or from the host stack and are put on an hwna rx ring. 1701 * The bridge wrapper then sends the packets through the bridge. 1702 */ 1703static int 1704netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, int flags) 1705{ 1706 struct ifnet *ifp = na->ifp; 1707 struct netmap_bwrap_adapter *bna = na->na_private; 1708 struct netmap_vp_adapter *hostna = &bna->host; 1709 struct netmap_kring *kring, *bkring; 1710 struct netmap_ring *ring; 1711 int is_host_ring = ring_nr == na->num_rx_rings; 1712 struct netmap_vp_adapter *vpna = &bna->up; 1713 int error = 0; 1714 1715 if (netmap_verbose) 1716 D("%s %s%d 0x%x", NM_IFPNAME(ifp), 1717 (tx == NR_TX ? "TX" : "RX"), ring_nr, flags); 1718 1719 if (flags & NAF_DISABLE_NOTIFY) { 1720 kring = tx == NR_TX ? na->tx_rings : na->rx_rings; 1721 bkring = tx == NR_TX ? vpna->up.rx_rings : vpna->up.tx_rings; 1722 if (kring[ring_nr].nkr_stopped) 1723 netmap_disable_ring(&bkring[ring_nr]); 1724 else 1725 bkring[ring_nr].nkr_stopped = 0; 1726 return 0; 1727 } 1728 1729 if (ifp == NULL || !(ifp->if_capenable & IFCAP_NETMAP)) 1730 return 0; 1731 1732 /* we only care about receive interrupts */ 1733 if (tx == NR_TX) 1734 return 0; 1735 1736 kring = &na->rx_rings[ring_nr]; 1737 ring = kring->ring; 1738 1739 /* make sure the ring is not disabled */ 1740 if (nm_kr_tryget(kring)) 1741 return 0; 1742 1743 if (is_host_ring && hostna->na_bdg == NULL) { 1744 error = bna->save_notify(na, ring_nr, tx, flags); 1745 goto put_out; 1746 } 1747 1748 /* Here we expect ring->head = ring->cur = ring->tail 1749 * because everything has been released from the previous round. 1750 * However the ring is shared and we might have info from 1751 * the wrong side (the tx ring). Hence we overwrite with 1752 * the info from the rx kring. 1753 */ 1754 if (netmap_verbose) 1755 D("%s head %d cur %d tail %d (kring %d %d %d)", NM_IFPNAME(ifp), 1756 ring->head, ring->cur, ring->tail, 1757 kring->rhead, kring->rcur, kring->rtail); 1758 1759 ring->head = kring->rhead; 1760 ring->cur = kring->rcur; 1761 ring->tail = kring->rtail; 1762 1763 /* simulate a user wakeup on the rx ring */ 1764 if (is_host_ring) { 1765 netmap_rxsync_from_host(na, NULL, NULL); 1766 vpna = hostna; 1767 ring_nr = 0; 1768 } else { 1769 /* fetch packets that have arrived. 1770 * XXX maybe do this in a loop ? 1771 */ 1772 error = na->nm_rxsync(na, ring_nr, 0); 1773 if (error) 1774 goto put_out; 1775 } 1776 if (kring->nr_hwcur == kring->nr_hwtail && netmap_verbose) { 1777 D("how strange, interrupt with no packets on %s", 1778 NM_IFPNAME(ifp)); 1779 goto put_out; 1780 } 1781 1782 /* new packets are ring->cur to ring->tail, and the bkring 1783 * had hwcur == ring->cur. So advance ring->cur to ring->tail 1784 * to push all packets out. 1785 */ 1786 ring->head = ring->cur = ring->tail; 1787 1788 /* also set tail to what the bwrap expects */ 1789 bkring = &vpna->up.tx_rings[ring_nr]; 1790 ring->tail = bkring->nr_hwtail; // rtail too ? 1791 1792 /* pass packets to the switch */ 1793 nm_txsync_prologue(bkring); // XXX error checking ? 1794 netmap_vp_txsync(vpna, ring_nr, flags); 1795 1796 /* mark all buffers as released on this ring */ 1797 ring->head = ring->cur = kring->nr_hwtail; 1798 ring->tail = kring->rtail; 1799 /* another call to actually release the buffers */ 1800 if (!is_host_ring) { 1801 error = na->nm_rxsync(na, ring_nr, 0); 1802 } else { 1803 /* mark all packets as released, as in the 1804 * second part of netmap_rxsync_from_host() 1805 */ 1806 kring->nr_hwcur = kring->nr_hwtail; 1807 nm_rxsync_finalize(kring); 1808 } 1809 1810put_out: 1811 nm_kr_put(kring); 1812 return error; 1813} 1814 1815 1816static int 1817netmap_bwrap_register(struct netmap_adapter *na, int onoff) 1818{ 1819 struct netmap_bwrap_adapter *bna = 1820 (struct netmap_bwrap_adapter *)na; 1821 struct netmap_adapter *hwna = bna->hwna; 1822 struct netmap_vp_adapter *hostna = &bna->host; 1823 int error; 1824 1825 ND("%s %s", NM_IFPNAME(na->ifp), onoff ? "on" : "off"); 1826 1827 if (onoff) { 1828 int i; 1829 1830 hwna->na_lut = na->na_lut; 1831 hwna->na_lut_objtotal = na->na_lut_objtotal; 1832 1833 if (hostna->na_bdg) { 1834 hostna->up.na_lut = na->na_lut; 1835 hostna->up.na_lut_objtotal = na->na_lut_objtotal; 1836 } 1837 1838 /* cross-link the netmap rings */ 1839 for (i = 0; i <= na->num_tx_rings; i++) { 1840 hwna->tx_rings[i].nkr_num_slots = na->rx_rings[i].nkr_num_slots; 1841 hwna->tx_rings[i].ring = na->rx_rings[i].ring; 1842 } 1843 for (i = 0; i <= na->num_rx_rings; i++) { 1844 hwna->rx_rings[i].nkr_num_slots = na->tx_rings[i].nkr_num_slots; 1845 hwna->rx_rings[i].ring = na->tx_rings[i].ring; 1846 } 1847 } 1848 1849 if (hwna->ifp) { 1850 error = hwna->nm_register(hwna, onoff); 1851 if (error) 1852 return error; 1853 } 1854 1855 bdg_netmap_reg(na, onoff); 1856 1857 if (onoff) { 1858 bna->save_notify = hwna->nm_notify; 1859 hwna->nm_notify = netmap_bwrap_intr_notify; 1860 } else { 1861 hwna->nm_notify = bna->save_notify; 1862 hwna->na_lut = NULL; 1863 hwna->na_lut_objtotal = 0; 1864 } 1865 1866 return 0; 1867} 1868 1869 1870static int 1871netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd, 1872 u_int *rxr, u_int *rxd) 1873{ 1874 struct netmap_bwrap_adapter *bna = 1875 (struct netmap_bwrap_adapter *)na; 1876 struct netmap_adapter *hwna = bna->hwna; 1877 1878 /* forward the request */ 1879 netmap_update_config(hwna); 1880 /* swap the results */ 1881 *txr = hwna->num_rx_rings; 1882 *txd = hwna->num_rx_desc; 1883 *rxr = hwna->num_tx_rings; 1884 *rxd = hwna->num_rx_desc; 1885 1886 return 0; 1887} 1888 1889 1890static int 1891netmap_bwrap_krings_create(struct netmap_adapter *na) 1892{ 1893 struct netmap_bwrap_adapter *bna = 1894 (struct netmap_bwrap_adapter *)na; 1895 struct netmap_adapter *hwna = bna->hwna; 1896 struct netmap_adapter *hostna = &bna->host.up; 1897 int error; 1898 1899 ND("%s", NM_IFPNAME(na->ifp)); 1900 1901 error = netmap_vp_krings_create(na); 1902 if (error) 1903 return error; 1904 1905 error = hwna->nm_krings_create(hwna); 1906 if (error) { 1907 netmap_vp_krings_delete(na); 1908 return error; 1909 } 1910 1911 hostna->tx_rings = na->tx_rings + na->num_tx_rings; 1912 hostna->rx_rings = na->rx_rings + na->num_rx_rings; 1913 1914 return 0; 1915} 1916 1917 1918static void 1919netmap_bwrap_krings_delete(struct netmap_adapter *na) 1920{ 1921 struct netmap_bwrap_adapter *bna = 1922 (struct netmap_bwrap_adapter *)na; 1923 struct netmap_adapter *hwna = bna->hwna; 1924 1925 ND("%s", NM_IFPNAME(na->ifp)); 1926 1927 hwna->nm_krings_delete(hwna); 1928 netmap_vp_krings_delete(na); 1929} 1930 1931 1932/* notify method for the bridge-->hwna direction */ 1933static int 1934netmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags) 1935{ 1936 struct netmap_bwrap_adapter *bna = 1937 (struct netmap_bwrap_adapter *)na; 1938 struct netmap_adapter *hwna = bna->hwna; 1939 struct netmap_kring *kring, *hw_kring; 1940 struct netmap_ring *ring; 1941 u_int lim; 1942 int error = 0; 1943 1944 if (tx == NR_TX) 1945 return ENXIO; 1946 1947 kring = &na->rx_rings[ring_n]; 1948 hw_kring = &hwna->tx_rings[ring_n]; 1949 ring = kring->ring; 1950 lim = kring->nkr_num_slots - 1; 1951 1952 if (hwna->ifp == NULL || !(hwna->ifp->if_capenable & IFCAP_NETMAP)) 1953 return 0; 1954 /* first step: simulate a user wakeup on the rx ring */ 1955 netmap_vp_rxsync(na, ring_n, flags); 1956 ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", 1957 NM_IFPNAME(na->ifp), ring_n, 1958 kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, 1959 ring->head, ring->cur, ring->tail, 1960 hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail); 1961 /* second step: the simulated user consumes all new packets */ 1962 ring->head = ring->cur = ring->tail; 1963 1964 /* third step: the new packets are sent on the tx ring 1965 * (which is actually the same ring) 1966 */ 1967 /* set tail to what the hw expects */ 1968 ring->tail = hw_kring->rtail; 1969 if (ring_n == na->num_rx_rings) { 1970 netmap_txsync_to_host(hwna); 1971 } else { 1972 nm_txsync_prologue(&hwna->tx_rings[ring_n]); // XXX error checking ? 1973 error = hwna->nm_txsync(hwna, ring_n, flags); 1974 } 1975 1976 /* fourth step: now we are back the rx ring */ 1977 /* claim ownership on all hw owned bufs */ 1978 ring->head = nm_next(ring->tail, lim); /* skip past reserved slot */ 1979 ring->tail = kring->rtail; /* restore saved value of tail, for safety */ 1980 1981 /* fifth step: the user goes to sleep again, causing another rxsync */ 1982 netmap_vp_rxsync(na, ring_n, flags); 1983 ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", 1984 NM_IFPNAME(na->ifp), ring_n, 1985 kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, 1986 ring->head, ring->cur, ring->tail, 1987 hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail); 1988 1989 return error; 1990} 1991 1992 1993static int 1994netmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags) 1995{ 1996 struct netmap_bwrap_adapter *bna = na->na_private; 1997 struct netmap_adapter *port_na = &bna->up.up; 1998 if (tx == NR_TX || ring_n != 0) 1999 return ENXIO; 2000 return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags); 2001} 2002 2003 2004/* attach a bridge wrapper to the 'real' device */ 2005static int 2006netmap_bwrap_attach(struct ifnet *fake, struct ifnet *real) 2007{ 2008 struct netmap_bwrap_adapter *bna; 2009 struct netmap_adapter *na; 2010 struct netmap_adapter *hwna = NA(real); 2011 struct netmap_adapter *hostna; 2012 int error; 2013 2014 2015 bna = malloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO); 2016 if (bna == NULL) 2017 return ENOMEM; 2018 2019 na = &bna->up.up; 2020 na->ifp = fake; 2021 /* fill the ring data for the bwrap adapter with rx/tx meanings 2022 * swapped. The real cross-linking will be done during register, 2023 * when all the krings will have been created. 2024 */ 2025 na->num_rx_rings = hwna->num_tx_rings; 2026 na->num_tx_rings = hwna->num_rx_rings; 2027 na->num_tx_desc = hwna->num_rx_desc; 2028 na->num_rx_desc = hwna->num_tx_desc; 2029 na->nm_dtor = netmap_bwrap_dtor; 2030 na->nm_register = netmap_bwrap_register; 2031 // na->nm_txsync = netmap_bwrap_txsync; 2032 // na->nm_rxsync = netmap_bwrap_rxsync; 2033 na->nm_config = netmap_bwrap_config; 2034 na->nm_krings_create = netmap_bwrap_krings_create; 2035 na->nm_krings_delete = netmap_bwrap_krings_delete; 2036 na->nm_notify = netmap_bwrap_notify; 2037 na->nm_mem = hwna->nm_mem; 2038 na->na_private = na; /* prevent NIOCREGIF */ 2039 bna->up.retry = 1; /* XXX maybe this should depend on the hwna */ 2040 2041 bna->hwna = hwna; 2042 netmap_adapter_get(hwna); 2043 hwna->na_private = bna; /* weak reference */ 2044 2045 hostna = &bna->host.up; 2046 hostna->ifp = hwna->ifp; 2047 hostna->num_tx_rings = 1; 2048 hostna->num_tx_desc = hwna->num_rx_desc; 2049 hostna->num_rx_rings = 1; 2050 hostna->num_rx_desc = hwna->num_tx_desc; 2051 // hostna->nm_txsync = netmap_bwrap_host_txsync; 2052 // hostna->nm_rxsync = netmap_bwrap_host_rxsync; 2053 hostna->nm_notify = netmap_bwrap_host_notify; 2054 hostna->nm_mem = na->nm_mem; 2055 hostna->na_private = bna; 2056 2057 ND("%s<->%s txr %d txd %d rxr %d rxd %d", 2058 fake->if_xname, real->if_xname, 2059 na->num_tx_rings, na->num_tx_desc, 2060 na->num_rx_rings, na->num_rx_desc); 2061 2062 error = netmap_attach_common(na); 2063 if (error) { 2064 netmap_adapter_put(hwna); 2065 free(bna, M_DEVBUF); 2066 return error; 2067 } 2068 return 0; 2069} 2070 2071 2072void 2073netmap_init_bridges(void) 2074{ 2075 int i; 2076 bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */ 2077 for (i = 0; i < NM_BRIDGES; i++) 2078 BDG_RWINIT(&nm_bridges[i]); 2079} 2080#endif /* WITH_VALE */ 2081