netmap.c (259538) | netmap.c (260368) |
---|---|
1/* | 1/* |
2 * Copyright (C) 2011-2013 Matteo Landi, Luigi Rizzo. All rights reserved. | 2 * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. |
3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the --- 9 unchanged lines hidden (view full) --- 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 27/* | 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the --- 9 unchanged lines hidden (view full) --- 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 27/* |
28 * $FreeBSD: head/sys/dev/netmap/netmap.c 259538 2013-12-18 04:36:35Z glebius $ | 28 * $FreeBSD: head/sys/dev/netmap/netmap.c 260368 2014-01-06 12:53:15Z luigi $ |
29 * 30 * This module supports memory mapped access to network devices, 31 * see netmap(4). 32 * 33 * The module uses a large, memory pool allocated by the kernel 34 * and accessible as mmapped memory by multiple userspace threads/processes. 35 * The memory pool contains packet buffers and "netmap rings", 36 * i.e. user-accessible copies of the interface's queues. --- 109 unchanged lines hidden (view full) --- 146#include <sys/selinfo.h> 147#include <sys/sysctl.h> 148#include <net/if.h> 149#include <net/if_var.h> 150#include <net/bpf.h> /* BIOCIMMEDIATE */ 151#include <machine/bus.h> /* bus_dmamap_* */ 152#include <sys/endian.h> 153#include <sys/refcount.h> | 29 * 30 * This module supports memory mapped access to network devices, 31 * see netmap(4). 32 * 33 * The module uses a large, memory pool allocated by the kernel 34 * and accessible as mmapped memory by multiple userspace threads/processes. 35 * The memory pool contains packet buffers and "netmap rings", 36 * i.e. user-accessible copies of the interface's queues. --- 109 unchanged lines hidden (view full) --- 146#include <sys/selinfo.h> 147#include <sys/sysctl.h> 148#include <net/if.h> 149#include <net/if_var.h> 150#include <net/bpf.h> /* BIOCIMMEDIATE */ 151#include <machine/bus.h> /* bus_dmamap_* */ 152#include <sys/endian.h> 153#include <sys/refcount.h> |
154#include <sys/jail.h> | |
155 156 157/* reduce conditional code */ 158#define init_waitqueue_head(x) // only needed in linux 159 160 161 162#elif defined(linux) --- 58 unchanged lines hidden (view full) --- 221/* 222 * netmap_admode selects the netmap mode to use. 223 * Invalid values are reset to NETMAP_ADMODE_BEST 224 */ 225enum { NETMAP_ADMODE_BEST = 0, /* use native, fallback to generic */ 226 NETMAP_ADMODE_NATIVE, /* either native or none */ 227 NETMAP_ADMODE_GENERIC, /* force generic */ 228 NETMAP_ADMODE_LAST }; | 154 155 156/* reduce conditional code */ 157#define init_waitqueue_head(x) // only needed in linux 158 159 160 161#elif defined(linux) --- 58 unchanged lines hidden (view full) --- 220/* 221 * netmap_admode selects the netmap mode to use. 222 * Invalid values are reset to NETMAP_ADMODE_BEST 223 */ 224enum { NETMAP_ADMODE_BEST = 0, /* use native, fallback to generic */ 225 NETMAP_ADMODE_NATIVE, /* either native or none */ 226 NETMAP_ADMODE_GENERIC, /* force generic */ 227 NETMAP_ADMODE_LAST }; |
229#define NETMAP_ADMODE_NATIVE 1 /* Force native netmap adapter. */ 230#define NETMAP_ADMODE_GENERIC 2 /* Force generic netmap adapter. */ 231#define NETMAP_ADMODE_BEST 0 /* Priority to native netmap adapter. */ | |
232static int netmap_admode = NETMAP_ADMODE_BEST; 233 234int netmap_generic_mit = 100*1000; /* Generic mitigation interval in nanoseconds. */ 235int netmap_generic_ringsize = 1024; /* Generic ringsize. */ 236 237SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , ""); 238SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , ""); 239SYSCTL_INT(_dev_netmap, OID_AUTO, mmap_unreg, CTLFLAG_RW, &netmap_mmap_unreg, 0, ""); --- 7 unchanged lines hidden (view full) --- 247static void 248nm_kr_get(struct netmap_kring *kr) 249{ 250 while (NM_ATOMIC_TEST_AND_SET(&kr->nr_busy)) 251 tsleep(kr, 0, "NM_KR_GET", 4); 252} 253 254 | 228static int netmap_admode = NETMAP_ADMODE_BEST; 229 230int netmap_generic_mit = 100*1000; /* Generic mitigation interval in nanoseconds. */ 231int netmap_generic_ringsize = 1024; /* Generic ringsize. */ 232 233SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , ""); 234SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , ""); 235SYSCTL_INT(_dev_netmap, OID_AUTO, mmap_unreg, CTLFLAG_RW, &netmap_mmap_unreg, 0, ""); --- 7 unchanged lines hidden (view full) --- 243static void 244nm_kr_get(struct netmap_kring *kr) 245{ 246 while (NM_ATOMIC_TEST_AND_SET(&kr->nr_busy)) 247 tsleep(kr, 0, "NM_KR_GET", 4); 248} 249 250 |
251/* 252 * mark the ring as stopped, and run through the locks 253 * to make sure other users get to see it. 254 */ |
|
255void 256netmap_disable_ring(struct netmap_kring *kr) 257{ 258 kr->nkr_stopped = 1; 259 nm_kr_get(kr); 260 mtx_lock(&kr->q_lock); 261 mtx_unlock(&kr->q_lock); 262 nm_kr_put(kr); --- 112 unchanged lines hidden (view full) --- 375 *o = '\0'; 376#undef P_HI 377#undef P_LO 378#undef P_C 379 return dst; 380} 381 382 | 255void 256netmap_disable_ring(struct netmap_kring *kr) 257{ 258 kr->nkr_stopped = 1; 259 nm_kr_get(kr); 260 mtx_lock(&kr->q_lock); 261 mtx_unlock(&kr->q_lock); 262 nm_kr_put(kr); --- 112 unchanged lines hidden (view full) --- 375 *o = '\0'; 376#undef P_HI 377#undef P_LO 378#undef P_C 379 return dst; 380} 381 382 |
383 | |
384/* 385 * Fetch configuration from the device, to cope with dynamic 386 * reconfigurations after loading the module. 387 */ 388int 389netmap_update_config(struct netmap_adapter *na) 390{ 391 struct ifnet *ifp = na->ifp; --- 35 unchanged lines hidden (view full) --- 427 428 429int 430netmap_krings_create(struct netmap_adapter *na, u_int ntx, u_int nrx, u_int tailroom) 431{ 432 u_int i, len, ndesc; 433 struct netmap_kring *kring; 434 | 383/* 384 * Fetch configuration from the device, to cope with dynamic 385 * reconfigurations after loading the module. 386 */ 387int 388netmap_update_config(struct netmap_adapter *na) 389{ 390 struct ifnet *ifp = na->ifp; --- 35 unchanged lines hidden (view full) --- 426 427 428int 429netmap_krings_create(struct netmap_adapter *na, u_int ntx, u_int nrx, u_int tailroom) 430{ 431 u_int i, len, ndesc; 432 struct netmap_kring *kring; 433 |
434 // XXX additional space for extra rings ? |
|
435 len = (ntx + nrx) * sizeof(struct netmap_kring) + tailroom; 436 437 na->tx_rings = malloc((size_t)len, M_DEVBUF, M_NOWAIT | M_ZERO); 438 if (na->tx_rings == NULL) { 439 D("Cannot allocate krings"); 440 return ENOMEM; 441 } 442 na->rx_rings = na->tx_rings + ntx; 443 | 435 len = (ntx + nrx) * sizeof(struct netmap_kring) + tailroom; 436 437 na->tx_rings = malloc((size_t)len, M_DEVBUF, M_NOWAIT | M_ZERO); 438 if (na->tx_rings == NULL) { 439 D("Cannot allocate krings"); 440 return ENOMEM; 441 } 442 na->rx_rings = na->tx_rings + ntx; 443 |
444 /* 445 * All fields in krings are 0 except the one initialized below. 446 * but better be explicit on important kring fields. 447 */ |
|
444 ndesc = na->num_tx_desc; 445 for (i = 0; i < ntx; i++) { /* Transmit rings */ 446 kring = &na->tx_rings[i]; 447 bzero(kring, sizeof(*kring)); 448 kring->na = na; | 448 ndesc = na->num_tx_desc; 449 for (i = 0; i < ntx; i++) { /* Transmit rings */ 450 kring = &na->tx_rings[i]; 451 bzero(kring, sizeof(*kring)); 452 kring->na = na; |
453 kring->ring_id = i; |
|
449 kring->nkr_num_slots = ndesc; 450 /* | 454 kring->nkr_num_slots = ndesc; 455 /* |
451 * IMPORTANT: 452 * Always keep one slot empty, so we can detect new 453 * transmissions comparing cur and nr_hwcur (they are 454 * the same only if there are no new transmissions). | 456 * IMPORTANT: Always keep one slot empty. |
455 */ | 457 */ |
456 kring->nr_hwavail = ndesc - 1; | 458 kring->rhead = kring->rcur = kring->nr_hwcur = 0; 459 kring->rtail = kring->nr_hwtail = ndesc - 1; 460 snprintf(kring->name, sizeof(kring->name) - 1, "%s TX%d", NM_IFPNAME(na->ifp), i); |
457 mtx_init(&kring->q_lock, "nm_txq_lock", NULL, MTX_DEF); 458 init_waitqueue_head(&kring->si); 459 } 460 461 ndesc = na->num_rx_desc; 462 for (i = 0; i < nrx; i++) { /* Receive rings */ 463 kring = &na->rx_rings[i]; 464 bzero(kring, sizeof(*kring)); 465 kring->na = na; | 461 mtx_init(&kring->q_lock, "nm_txq_lock", NULL, MTX_DEF); 462 init_waitqueue_head(&kring->si); 463 } 464 465 ndesc = na->num_rx_desc; 466 for (i = 0; i < nrx; i++) { /* Receive rings */ 467 kring = &na->rx_rings[i]; 468 bzero(kring, sizeof(*kring)); 469 kring->na = na; |
470 kring->ring_id = i; |
|
466 kring->nkr_num_slots = ndesc; | 471 kring->nkr_num_slots = ndesc; |
472 kring->rhead = kring->rcur = kring->nr_hwcur = 0; 473 kring->rtail = kring->nr_hwtail = 0; 474 snprintf(kring->name, sizeof(kring->name) - 1, "%s RX%d", NM_IFPNAME(na->ifp), i); |
|
467 mtx_init(&kring->q_lock, "nm_rxq_lock", NULL, MTX_DEF); 468 init_waitqueue_head(&kring->si); 469 } 470 init_waitqueue_head(&na->tx_si); 471 init_waitqueue_head(&na->rx_si); 472 473 na->tailroom = na->rx_rings + nrx; 474 475 return 0; | 475 mtx_init(&kring->q_lock, "nm_rxq_lock", NULL, MTX_DEF); 476 init_waitqueue_head(&kring->si); 477 } 478 init_waitqueue_head(&na->tx_si); 479 init_waitqueue_head(&na->rx_si); 480 481 na->tailroom = na->rx_rings + nrx; 482 483 return 0; |
476 | |
477} 478 479 | 484} 485 486 |
487/* XXX check boundaries */ |
|
480void 481netmap_krings_delete(struct netmap_adapter *na) 482{ 483 int i; 484 485 for (i = 0; i < na->num_tx_rings + 1; i++) { 486 mtx_destroy(&na->tx_rings[i].q_lock); 487 } 488 for (i = 0; i < na->num_rx_rings + 1; i++) { 489 mtx_destroy(&na->rx_rings[i].q_lock); 490 } 491 free(na->tx_rings, M_DEVBUF); 492 na->tx_rings = na->rx_rings = na->tailroom = NULL; 493} 494 495 | 488void 489netmap_krings_delete(struct netmap_adapter *na) 490{ 491 int i; 492 493 for (i = 0; i < na->num_tx_rings + 1; i++) { 494 mtx_destroy(&na->tx_rings[i].q_lock); 495 } 496 for (i = 0; i < na->num_rx_rings + 1; i++) { 497 mtx_destroy(&na->rx_rings[i].q_lock); 498 } 499 free(na->tx_rings, M_DEVBUF); 500 na->tx_rings = na->rx_rings = na->tailroom = NULL; 501} 502 503 |
504/* 505 * Destructor for NIC ports. They also have an mbuf queue 506 * on the rings connected to the host so we need to purge 507 * them first. 508 */ 509static void 510netmap_hw_krings_delete(struct netmap_adapter *na) 511{ 512 struct mbq *q = &na->rx_rings[na->num_rx_rings].rx_queue; 513 514 ND("destroy sw mbq with len %d", mbq_len(q)); 515 mbq_purge(q); 516 mbq_safe_destroy(q); 517 netmap_krings_delete(na); 518} 519 520 |
|
496static struct netmap_if* 497netmap_if_new(const char *ifname, struct netmap_adapter *na) 498{ 499 struct netmap_if *nifp; 500 501 if (netmap_update_config(na)) { 502 /* configuration mismatch, report and fail */ 503 return NULL; --- 212 unchanged lines hidden (view full) --- 716 * into mbufs and we are done. The host --> NIC side is slightly 717 * harder because there might not be room in the tx ring so it 718 * might take a while before releasing the buffer. 719 */ 720 721 722/* 723 * pass a chain of buffers to the host stack as coming from 'dst' | 521static struct netmap_if* 522netmap_if_new(const char *ifname, struct netmap_adapter *na) 523{ 524 struct netmap_if *nifp; 525 526 if (netmap_update_config(na)) { 527 /* configuration mismatch, report and fail */ 528 return NULL; --- 212 unchanged lines hidden (view full) --- 741 * into mbufs and we are done. The host --> NIC side is slightly 742 * harder because there might not be room in the tx ring so it 743 * might take a while before releasing the buffer. 744 */ 745 746 747/* 748 * pass a chain of buffers to the host stack as coming from 'dst' |
749 * We do not need to lock because the queue is private. |
|
724 */ 725static void 726netmap_send_up(struct ifnet *dst, struct mbq *q) 727{ 728 struct mbuf *m; 729 730 /* send packets up, outside the lock */ 731 while ((m = mbq_dequeue(q)) != NULL) { 732 if (netmap_verbose & NM_VERB_HOST) 733 D("sending up pkt %p size %d", m, MBUF_LEN(m)); 734 NM_SEND_UP(dst, m); 735 } 736 mbq_destroy(q); 737} 738 739 740/* 741 * put a copy of the buffers marked NS_FORWARD into an mbuf chain. | 750 */ 751static void 752netmap_send_up(struct ifnet *dst, struct mbq *q) 753{ 754 struct mbuf *m; 755 756 /* send packets up, outside the lock */ 757 while ((m = mbq_dequeue(q)) != NULL) { 758 if (netmap_verbose & NM_VERB_HOST) 759 D("sending up pkt %p size %d", m, MBUF_LEN(m)); 760 NM_SEND_UP(dst, m); 761 } 762 mbq_destroy(q); 763} 764 765 766/* 767 * put a copy of the buffers marked NS_FORWARD into an mbuf chain. |
742 * Run from hwcur to cur - reserved | 768 * Take packets from hwcur to ring->head marked NS_FORWARD (or forced) 769 * and pass them up. Drop remaining packets in the unlikely event 770 * of an mbuf shortage. |
743 */ 744static void 745netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force) 746{ | 771 */ 772static void 773netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force) 774{ |
747 /* Take packets from hwcur to cur-reserved and pass them up. 748 * In case of no buffers we give up. At the end of the loop, 749 * the queue is drained in all cases. 750 * XXX handle reserved 751 */ 752 u_int lim = kring->nkr_num_slots - 1; 753 struct mbuf *m; 754 u_int k = kring->ring->cur, n = kring->ring->reserved; | 775 u_int const lim = kring->nkr_num_slots - 1; 776 u_int const head = kring->ring->head; 777 u_int n; |
755 struct netmap_adapter *na = kring->na; 756 | 778 struct netmap_adapter *na = kring->na; 779 |
757 /* compute the final position, ring->cur - ring->reserved */ 758 if (n > 0) { 759 if (k < n) 760 k += kring->nkr_num_slots; 761 k += n; 762 } 763 for (n = kring->nr_hwcur; n != k;) { | 780 for (n = kring->nr_hwcur; n != head; n = nm_next(n, lim)) { 781 struct mbuf *m; |
764 struct netmap_slot *slot = &kring->ring->slot[n]; 765 | 782 struct netmap_slot *slot = &kring->ring->slot[n]; 783 |
766 n = nm_next(n, lim); | |
767 if ((slot->flags & NS_FORWARD) == 0 && !force) 768 continue; 769 if (slot->len < 14 || slot->len > NETMAP_BDG_BUF_SIZE(na->nm_mem)) { | 784 if ((slot->flags & NS_FORWARD) == 0 && !force) 785 continue; 786 if (slot->len < 14 || slot->len > NETMAP_BDG_BUF_SIZE(na->nm_mem)) { |
770 D("bad pkt at %d len %d", n, slot->len); | 787 RD(5, "bad pkt at %d len %d", n, slot->len); |
771 continue; 772 } 773 slot->flags &= ~NS_FORWARD; // XXX needed ? | 788 continue; 789 } 790 slot->flags &= ~NS_FORWARD; // XXX needed ? |
774 /* XXX adapt to the case of a multisegment packet */ | 791 /* XXX TODO: adapt to the case of a multisegment packet */ |
775 m = m_devget(BDG_NMB(na, slot), slot->len, 0, na->ifp, NULL); 776 777 if (m == NULL) 778 break; 779 mbq_enqueue(q, m); 780 } 781} 782 783 784/* | 792 m = m_devget(BDG_NMB(na, slot), slot->len, 0, na->ifp, NULL); 793 794 if (m == NULL) 795 break; 796 mbq_enqueue(q, m); 797 } 798} 799 800 801/* |
785 * The host ring has packets from nr_hwcur to (cur - reserved) 786 * to be sent down to the NIC. 787 * We need to use the queue lock on the source (host RX ring) 788 * to protect against netmap_transmit. 789 * If the user is well behaved we do not need to acquire locks 790 * on the destination(s), 791 * so we only need to make sure that there are no panics because 792 * of user errors. 793 * XXX verify 794 * 795 * We scan the tx rings, which have just been 796 * flushed so nr_hwcur == cur. Pushing packets down means 797 * increment cur and decrement avail. 798 * XXX to be verified | 802 * Send to the NIC rings packets marked NS_FORWARD between 803 * kring->nr_hwcur and kring->rhead 804 * Called under kring->rx_queue.lock on the sw rx ring, |
799 */ | 805 */ |
800static void | 806static u_int |
801netmap_sw_to_nic(struct netmap_adapter *na) 802{ 803 struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings]; | 807netmap_sw_to_nic(struct netmap_adapter *na) 808{ 809 struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings]; |
804 struct netmap_kring *k1 = &na->tx_rings[0]; 805 u_int i, howmany, src_lim, dst_lim; | 810 struct netmap_slot *rxslot = kring->ring->slot; 811 u_int i, rxcur = kring->nr_hwcur; 812 u_int const head = kring->rhead; 813 u_int const src_lim = kring->nkr_num_slots - 1; 814 u_int sent = 0; |
806 | 815 |
807 /* XXX we should also check that the carrier is on */ 808 if (kring->nkr_stopped) 809 return; | 816 /* scan rings to find space, then fill as much as possible */ 817 for (i = 0; i < na->num_tx_rings; i++) { 818 struct netmap_kring *kdst = &na->tx_rings[i]; 819 struct netmap_ring *rdst = kdst->ring; 820 u_int const dst_lim = kdst->nkr_num_slots - 1; |
810 | 821 |
811 mtx_lock(&kring->q_lock); | 822 /* XXX do we trust ring or kring->rcur,rtail ? */ 823 for (; rxcur != head && !nm_ring_empty(rdst); 824 rxcur = nm_next(rxcur, src_lim) ) { 825 struct netmap_slot *src, *dst, tmp; 826 u_int dst_cur = rdst->cur; |
812 | 827 |
813 if (kring->nkr_stopped) 814 goto out; | 828 src = &rxslot[rxcur]; 829 if ((src->flags & NS_FORWARD) == 0 && !netmap_fwd) 830 continue; |
815 | 831 |
816 howmany = kring->nr_hwavail; /* XXX otherwise cur - reserved - nr_hwcur */ | 832 sent++; |
817 | 833 |
818 src_lim = kring->nkr_num_slots - 1; 819 for (i = 0; howmany > 0 && i < na->num_tx_rings; i++, k1++) { 820 ND("%d packets left to ring %d (space %d)", howmany, i, k1->nr_hwavail); 821 dst_lim = k1->nkr_num_slots - 1; 822 while (howmany > 0 && k1->ring->avail > 0) { 823 struct netmap_slot *src, *dst, tmp; 824 src = &kring->ring->slot[kring->nr_hwcur]; 825 dst = &k1->ring->slot[k1->ring->cur]; | 834 dst = &rdst->slot[dst_cur]; 835 |
826 tmp = *src; | 836 tmp = *src; |
837 |
|
827 src->buf_idx = dst->buf_idx; 828 src->flags = NS_BUF_CHANGED; 829 830 dst->buf_idx = tmp.buf_idx; 831 dst->len = tmp.len; 832 dst->flags = NS_BUF_CHANGED; | 838 src->buf_idx = dst->buf_idx; 839 src->flags = NS_BUF_CHANGED; 840 841 dst->buf_idx = tmp.buf_idx; 842 dst->len = tmp.len; 843 dst->flags = NS_BUF_CHANGED; |
833 ND("out len %d buf %d from %d to %d", 834 dst->len, dst->buf_idx, 835 kring->nr_hwcur, k1->ring->cur); | |
836 | 844 |
837 kring->nr_hwcur = nm_next(kring->nr_hwcur, src_lim); 838 howmany--; 839 kring->nr_hwavail--; 840 k1->ring->cur = nm_next(k1->ring->cur, dst_lim); 841 k1->ring->avail--; | 845 rdst->cur = nm_next(dst_cur, dst_lim); |
842 } | 846 } |
843 kring->ring->cur = kring->nr_hwcur; // XXX 844 k1++; // XXX why? | 847 /* if (sent) XXX txsync ? */ |
845 } | 848 } |
846out: 847 mtx_unlock(&kring->q_lock); | 849 return sent; |
848} 849 850 851/* 852 * netmap_txsync_to_host() passes packets up. We are called from a 853 * system call in user process context, and the only contention 854 * can be among multiple user threads erroneously calling 855 * this routine concurrently. 856 */ 857void 858netmap_txsync_to_host(struct netmap_adapter *na) 859{ 860 struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings]; 861 struct netmap_ring *ring = kring->ring; | 850} 851 852 853/* 854 * netmap_txsync_to_host() passes packets up. We are called from a 855 * system call in user process context, and the only contention 856 * can be among multiple user threads erroneously calling 857 * this routine concurrently. 858 */ 859void 860netmap_txsync_to_host(struct netmap_adapter *na) 861{ 862 struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings]; 863 struct netmap_ring *ring = kring->ring; |
862 u_int k, lim = kring->nkr_num_slots - 1; | 864 u_int const lim = kring->nkr_num_slots - 1; 865 u_int const head = nm_txsync_prologue(kring); |
863 struct mbq q; 864 int error; 865 866 error = nm_kr_tryget(kring); 867 if (error) { 868 if (error == NM_KR_BUSY) 869 D("ring %p busy (user error)", kring); 870 return; 871 } | 866 struct mbq q; 867 int error; 868 869 error = nm_kr_tryget(kring); 870 if (error) { 871 if (error == NM_KR_BUSY) 872 D("ring %p busy (user error)", kring); 873 return; 874 } |
872 k = ring->cur; 873 if (k > lim) { | 875 if (head > lim) { |
874 D("invalid ring index in stack TX kring %p", kring); 875 netmap_ring_reinit(kring); 876 nm_kr_put(kring); 877 return; 878 } 879 | 876 D("invalid ring index in stack TX kring %p", kring); 877 netmap_ring_reinit(kring); 878 nm_kr_put(kring); 879 return; 880 } 881 |
880 /* Take packets from hwcur to cur and pass them up. | 882 /* Take packets from hwcur to head and pass them up. 883 * force head = cur since netmap_grab_packets() stops at head |
881 * In case of no buffers we give up. At the end of the loop, 882 * the queue is drained in all cases. 883 */ 884 mbq_init(&q); | 884 * In case of no buffers we give up. At the end of the loop, 885 * the queue is drained in all cases. 886 */ 887 mbq_init(&q); |
885 netmap_grab_packets(kring, &q, 1); 886 kring->nr_hwcur = k; 887 kring->nr_hwavail = ring->avail = lim; | 888 ring->cur = head; 889 netmap_grab_packets(kring, &q, 1 /* force */); 890 ND("have %d pkts in queue", mbq_len(&q)); 891 kring->nr_hwcur = head; 892 kring->nr_hwtail = head + lim; 893 if (kring->nr_hwtail > lim) 894 kring->nr_hwtail -= lim + 1; 895 nm_txsync_finalize(kring); |
888 889 nm_kr_put(kring); 890 netmap_send_up(na->ifp, &q); 891} 892 893 894/* 895 * rxsync backend for packets coming from the host stack. | 896 897 nm_kr_put(kring); 898 netmap_send_up(na->ifp, &q); 899} 900 901 902/* 903 * rxsync backend for packets coming from the host stack. |
896 * They have been put in the queue by netmap_transmit() so we 897 * need to protect access to the kring using a lock. | 904 * They have been put in kring->rx_queue by netmap_transmit(). 905 * We protect access to the kring using kring->rx_queue.lock |
898 * 899 * This routine also does the selrecord if called from the poll handler 900 * (we know because td != NULL). 901 * 902 * NOTE: on linux, selrecord() is defined as a macro and uses pwait 903 * as an additional hidden argument. | 906 * 907 * This routine also does the selrecord if called from the poll handler 908 * (we know because td != NULL). 909 * 910 * NOTE: on linux, selrecord() is defined as a macro and uses pwait 911 * as an additional hidden argument. |
912 * returns the number of packets delivered to tx queues in 913 * transparent mode, or a negative value if error |
|
904 */ | 914 */ |
905static void | 915int |
906netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait) 907{ 908 struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings]; 909 struct netmap_ring *ring = kring->ring; | 916netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait) 917{ 918 struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings]; 919 struct netmap_ring *ring = kring->ring; |
910 u_int j, n, lim = kring->nkr_num_slots; 911 u_int k = ring->cur, resvd = ring->reserved; | 920 u_int nm_i, n; 921 u_int const lim = kring->nkr_num_slots - 1; 922 u_int const head = nm_rxsync_prologue(kring); 923 int ret = 0; 924 struct mbq *q = &kring->rx_queue; |
912 913 (void)pwait; /* disable unused warnings */ 914 | 925 926 (void)pwait; /* disable unused warnings */ 927 |
928 if (head > lim) { 929 netmap_ring_reinit(kring); 930 return EINVAL; 931 } 932 |
|
915 if (kring->nkr_stopped) /* check a first time without lock */ | 933 if (kring->nkr_stopped) /* check a first time without lock */ |
916 return; | 934 return EBUSY; |
917 | 935 |
918 mtx_lock(&kring->q_lock); | 936 mtx_lock(&q->lock); |
919 | 937 |
920 if (kring->nkr_stopped) /* check again with lock held */ | 938 if (kring->nkr_stopped) { /* check again with lock held */ 939 ret = EBUSY; |
921 goto unlock_out; | 940 goto unlock_out; |
922 923 if (k >= lim) { 924 netmap_ring_reinit(kring); 925 goto unlock_out; | |
926 } | 941 } |
927 /* new packets are already set in nr_hwavail */ 928 /* skip past packets that userspace has released */ 929 j = kring->nr_hwcur; 930 if (resvd > 0) { 931 if (resvd + ring->avail >= lim + 1) { 932 D("XXX invalid reserve/avail %d %d", resvd, ring->avail); 933 ring->reserved = resvd = 0; // XXX panic... | 942 943 /* First part: import newly received packets */ 944 n = mbq_len(q); 945 if (n) { /* grab packets from the queue */ 946 struct mbuf *m; 947 uint32_t stop_i; 948 949 nm_i = kring->nr_hwtail; 950 stop_i = nm_prev(nm_i, lim); 951 while ( nm_i != stop_i && (m = mbq_dequeue(q)) != NULL ) { 952 int len = MBUF_LEN(m); 953 struct netmap_slot *slot = &ring->slot[nm_i]; 954 955 m_copydata(m, 0, len, BDG_NMB(na, slot)); 956 ND("nm %d len %d", nm_i, len); 957 if (netmap_verbose) 958 D("%s", nm_dump_buf(BDG_NMB(na, slot),len, 128, NULL)); 959 960 slot->len = len; 961 slot->flags = kring->nkr_slot_flags; 962 nm_i = nm_next(nm_i, lim); |
934 } | 963 } |
935 k = (k >= resvd) ? k - resvd : k + lim - resvd; | 964 kring->nr_hwtail = nm_i; |
936 } | 965 } |
937 if (j != k) { 938 n = k >= j ? k - j : k + lim - j; 939 kring->nr_hwavail -= n; 940 kring->nr_hwcur = k; | 966 967 /* 968 * Second part: skip past packets that userspace has released. 969 */ 970 nm_i = kring->nr_hwcur; 971 if (nm_i != head) { /* something was released */ 972 if (netmap_fwd || kring->ring->flags & NR_FORWARD) 973 ret = netmap_sw_to_nic(na); 974 kring->nr_hwcur = head; |
941 } | 975 } |
942 k = ring->avail = kring->nr_hwavail - resvd; 943 if (k == 0 && td) | 976 977 nm_rxsync_finalize(kring); 978 979 /* access copies of cur,tail in the kring */ 980 if (kring->rcur == kring->rtail && td) /* no bufs available */ |
944 selrecord(td, &kring->si); | 981 selrecord(td, &kring->si); |
945 if (k && (netmap_verbose & NM_VERB_HOST)) 946 D("%d pkts from stack", k); | 982 |
947unlock_out: 948 | 983unlock_out: 984 |
949 mtx_unlock(&kring->q_lock); | 985 mtx_unlock(&q->lock); 986 return ret; |
950} 951 952 953/* Get a netmap adapter for the port. 954 * 955 * If it is possible to satisfy the request, return 0 956 * with *na containing the netmap adapter found. 957 * Otherwise return an error code, with *na containing NULL. --- 79 unchanged lines hidden (view full) --- 1037 *na = NA(ifp); 1038 gna = (struct netmap_generic_adapter*)NA(ifp); 1039 gna->prev = prev_na; /* save old na */ 1040 if (prev_na != NULL) { 1041 ifunit_ref(ifp->if_xname); 1042 // XXX add a refcount ? 1043 netmap_adapter_get(prev_na); 1044 } | 987} 988 989 990/* Get a netmap adapter for the port. 991 * 992 * If it is possible to satisfy the request, return 0 993 * with *na containing the netmap adapter found. 994 * Otherwise return an error code, with *na containing NULL. --- 79 unchanged lines hidden (view full) --- 1074 *na = NA(ifp); 1075 gna = (struct netmap_generic_adapter*)NA(ifp); 1076 gna->prev = prev_na; /* save old na */ 1077 if (prev_na != NULL) { 1078 ifunit_ref(ifp->if_xname); 1079 // XXX add a refcount ? 1080 netmap_adapter_get(prev_na); 1081 } |
1045 D("Created generic NA %p (prev %p)", gna, gna->prev); | 1082 ND("Created generic NA %p (prev %p)", gna, gna->prev); |
1046 1047 return 0; 1048} 1049 1050 1051/* 1052 * MUST BE CALLED UNDER NMG_LOCK() 1053 * --- 54 unchanged lines hidden (view full) --- 1108 1109 return error; 1110} 1111 1112 1113/* 1114 * validate parameters on entry for *_txsync() 1115 * Returns ring->cur if ok, or something >= kring->nkr_num_slots | 1083 1084 return 0; 1085} 1086 1087 1088/* 1089 * MUST BE CALLED UNDER NMG_LOCK() 1090 * --- 54 unchanged lines hidden (view full) --- 1145 1146 return error; 1147} 1148 1149 1150/* 1151 * validate parameters on entry for *_txsync() 1152 * Returns ring->cur if ok, or something >= kring->nkr_num_slots |
1116 * in case of error. The extra argument is a pointer to 1117 * 'new_bufs'. XXX this may be deprecated at some point. | 1153 * in case of error. |
1118 * | 1154 * |
1119 * Below is a correct configuration on input. ring->cur 1120 * must be in the region covered by kring->hwavail, 1121 * and ring->avail and kring->avail should end at the same slot. | 1155 * rhead, rcur and rtail=hwtail are stored from previous round. 1156 * hwcur is the next packet to send to the ring. |
1122 * | 1157 * |
1123 * +-hwcur 1124 * | 1125 * v<--hwres-->|<-----hwavail----> 1126 * ------+------------------------------+-------- ring 1127 * | 1128 * |<---avail---> 1129 * +--cur | 1158 * We want 1159 * hwcur <= *rhead <= head <= cur <= tail = *rtail <= hwtail |
1130 * | 1160 * |
1161 * hwcur, rhead, rtail and hwtail are reliable |
|
1131 */ 1132u_int | 1162 */ 1163u_int |
1133nm_txsync_prologue(struct netmap_kring *kring, u_int *new_slots) | 1164nm_txsync_prologue(struct netmap_kring *kring) |
1134{ 1135 struct netmap_ring *ring = kring->ring; | 1165{ 1166 struct netmap_ring *ring = kring->ring; |
1167 u_int head = ring->head; /* read only once */ |
|
1136 u_int cur = ring->cur; /* read only once */ | 1168 u_int cur = ring->cur; /* read only once */ |
1137 u_int avail = ring->avail; /* read only once */ | |
1138 u_int n = kring->nkr_num_slots; | 1169 u_int n = kring->nkr_num_slots; |
1139 u_int kstart, kend, a; | |
1140 | 1170 |
1141#if 1 /* kernel sanity checks */ 1142 if (kring->nr_hwcur >= n || 1143 kring->nr_hwreserved >= n || kring->nr_hwavail >= n || 1144 kring->nr_hwreserved + kring->nr_hwavail >= n) | 1171 ND(5, "%s kcur %d ktail %d head %d cur %d tail %d", 1172 kring->name, 1173 kring->nr_hwcur, kring->nr_hwtail, 1174 ring->head, ring->cur, ring->tail); 1175#if 1 /* kernel sanity checks; but we can trust the kring. */ 1176 if (kring->nr_hwcur >= n || kring->rhead >= n || 1177 kring->rtail >= n || kring->nr_hwtail >= n) |
1145 goto error; 1146#endif /* kernel sanity checks */ | 1178 goto error; 1179#endif /* kernel sanity checks */ |
1147 kstart = kring->nr_hwcur + kring->nr_hwreserved; 1148 if (kstart >= n) 1149 kstart -= n; 1150 kend = kstart + kring->nr_hwavail; 1151 /* user sanity checks. a is the expected avail */ 1152 if (cur < kstart) { 1153 /* too low, but maybe wraparound */ 1154 if (cur + n > kend) | 1180 /* 1181 * user sanity checks. We only use 'cur', 1182 * A, B, ... are possible positions for cur: 1183 * 1184 * 0 A cur B tail C n-1 1185 * 0 D tail E cur F n-1 1186 * 1187 * B, F, D are valid. A, C, E are wrong 1188 */ 1189 if (kring->rtail >= kring->rhead) { 1190 /* want rhead <= head <= rtail */ 1191 if (head < kring->rhead || head > kring->rtail) |
1155 goto error; | 1192 goto error; |
1156 *new_slots = cur + n - kstart; 1157 a = kend - cur - n; 1158 } else { 1159 if (cur > kend) | 1193 /* and also head <= cur <= rtail */ 1194 if (cur < head || cur > kring->rtail) |
1160 goto error; | 1195 goto error; |
1161 *new_slots = cur - kstart; 1162 a = kend - cur; | 1196 } else { /* here rtail < rhead */ 1197 /* we need head outside rtail .. rhead */ 1198 if (head > kring->rtail && head < kring->rhead) 1199 goto error; 1200 1201 /* two cases now: head <= rtail or head >= rhead */ 1202 if (head <= kring->rtail) { 1203 /* want head <= cur <= rtail */ 1204 if (cur < head || cur > kring->rtail) 1205 goto error; 1206 } else { /* head >= rhead */ 1207 /* cur must be outside rtail..head */ 1208 if (cur > kring->rtail && cur < head) 1209 goto error; 1210 } |
1163 } | 1211 } |
1164 if (a != avail) { 1165 RD(5, "wrong but fixable avail have %d need %d", 1166 avail, a); 1167 ring->avail = avail = a; | 1212 if (ring->tail != kring->rtail) { 1213 RD(5, "tail overwritten was %d need %d", 1214 ring->tail, kring->rtail); 1215 ring->tail = kring->rtail; |
1168 } | 1216 } |
1169 return cur; | 1217 kring->rhead = head; 1218 kring->rcur = cur; 1219 return head; |
1170 1171error: | 1220 1221error: |
1172 RD(5, "kring error: hwcur %d hwres %d hwavail %d cur %d av %d", | 1222 RD(5, "%s kring error: hwcur %d rcur %d hwtail %d cur %d tail %d", 1223 kring->name, |
1173 kring->nr_hwcur, | 1224 kring->nr_hwcur, |
1174 kring->nr_hwreserved, kring->nr_hwavail, 1175 cur, avail); | 1225 kring->rcur, kring->nr_hwtail, 1226 cur, ring->tail); |
1176 return n; 1177} 1178 1179 1180/* 1181 * validate parameters on entry for *_rxsync() | 1227 return n; 1228} 1229 1230 1231/* 1232 * validate parameters on entry for *_rxsync() |
1182 * Returns ring->cur - ring->reserved if ok, 1183 * or something >= kring->nkr_num_slots 1184 * in case of error. The extra argument is a pointer to 1185 * 'resvd'. XXX this may be deprecated at some point. | 1233 * Returns ring->head if ok, kring->nkr_num_slots on error. |
1186 * | 1234 * |
1187 * Below is a correct configuration on input. ring->cur and 1188 * ring->reserved must be in the region covered by kring->hwavail, 1189 * and ring->avail and kring->avail should end at the same slot. | 1235 * For a valid configuration, 1236 * hwcur <= head <= cur <= tail <= hwtail |
1190 * | 1237 * |
1191 * +-hwcur 1192 * | 1193 * v<-------hwavail----------> 1194 * ---------+--------------------------+-------- ring 1195 * |<--res-->| 1196 * |<---avail---> 1197 * +--cur | 1238 * We only consider head and cur. 1239 * hwcur and hwtail are reliable. |
1198 * 1199 */ 1200u_int | 1240 * 1241 */ 1242u_int |
1201nm_rxsync_prologue(struct netmap_kring *kring, u_int *resvd) | 1243nm_rxsync_prologue(struct netmap_kring *kring) |
1202{ 1203 struct netmap_ring *ring = kring->ring; | 1244{ 1245 struct netmap_ring *ring = kring->ring; |
1204 u_int cur = ring->cur; /* read only once */ 1205 u_int avail = ring->avail; /* read only once */ 1206 u_int res = ring->reserved; /* read only once */ 1207 u_int n = kring->nkr_num_slots; 1208 u_int kend = kring->nr_hwcur + kring->nr_hwavail; 1209 u_int a; | 1246 uint32_t const n = kring->nkr_num_slots; 1247 uint32_t head, cur; |
1210 | 1248 |
1249 ND("%s kc %d kt %d h %d c %d t %d", 1250 kring->name, 1251 kring->nr_hwcur, kring->nr_hwtail, 1252 ring->head, ring->cur, ring->tail); 1253 /* 1254 * Before storing the new values, we should check they do not 1255 * move backwards. However: 1256 * - head is not an issue because the previous value is hwcur; 1257 * - cur could in principle go back, however it does not matter 1258 * because we are processing a brand new rxsync() 1259 */ 1260 cur = kring->rcur = ring->cur; /* read only once */ 1261 head = kring->rhead = ring->head; /* read only once */ |
|
1211#if 1 /* kernel sanity checks */ | 1262#if 1 /* kernel sanity checks */ |
1212 if (kring->nr_hwcur >= n || kring->nr_hwavail >= n) | 1263 if (kring->nr_hwcur >= n || kring->nr_hwtail >= n) |
1213 goto error; 1214#endif /* kernel sanity checks */ 1215 /* user sanity checks */ | 1264 goto error; 1265#endif /* kernel sanity checks */ 1266 /* user sanity checks */ |
1216 if (res >= n) 1217 goto error; 1218 /* check that cur is valid, a is the expected value of avail */ 1219 if (cur < kring->nr_hwcur) { 1220 /* too low, but maybe wraparound */ 1221 if (cur + n > kend) | 1267 if (kring->nr_hwtail >= kring->nr_hwcur) { 1268 /* want hwcur <= rhead <= hwtail */ 1269 if (head < kring->nr_hwcur || head > kring->nr_hwtail) |
1222 goto error; | 1270 goto error; |
1223 a = kend - (cur + n); 1224 } else { 1225 if (cur > kend) | 1271 /* and also rhead <= rcur <= hwtail */ 1272 if (cur < head || cur > kring->nr_hwtail) |
1226 goto error; | 1273 goto error; |
1227 a = kend - cur; 1228 } 1229 if (a != avail) { 1230 RD(5, "wrong but fixable avail have %d need %d", 1231 avail, a); 1232 ring->avail = avail = a; 1233 } 1234 if (res != 0) { 1235 /* then repeat the check for cur + res */ 1236 cur = (cur >= res) ? cur - res : n + cur - res; 1237 if (cur < kring->nr_hwcur) { 1238 /* too low, but maybe wraparound */ 1239 if (cur + n > kend) 1240 goto error; 1241 } else if (cur > kend) { | 1274 } else { 1275 /* we need rhead outside hwtail..hwcur */ 1276 if (head < kring->nr_hwcur && head > kring->nr_hwtail) |
1242 goto error; | 1277 goto error; |
1278 /* two cases now: head <= hwtail or head >= hwcur */ 1279 if (head <= kring->nr_hwtail) { 1280 /* want head <= cur <= hwtail */ 1281 if (cur < head || cur > kring->nr_hwtail) 1282 goto error; 1283 } else { 1284 /* cur must be outside hwtail..head */ 1285 if (cur < head && cur > kring->nr_hwtail) 1286 goto error; |
|
1243 } 1244 } | 1287 } 1288 } |
1245 *resvd = res; 1246 return cur; | 1289 if (ring->tail != kring->rtail) { 1290 RD(5, "%s tail overwritten was %d need %d", 1291 kring->name, 1292 ring->tail, kring->rtail); 1293 ring->tail = kring->rtail; 1294 } 1295 return head; |
1247 1248error: | 1296 1297error: |
1249 RD(5, "kring error: hwcur %d hwres %d hwavail %d cur %d av %d res %d", | 1298 RD(5, "kring error: hwcur %d rcur %d hwtail %d head %d cur %d tail %d", |
1250 kring->nr_hwcur, | 1299 kring->nr_hwcur, |
1251 kring->nr_hwreserved, kring->nr_hwavail, 1252 ring->cur, avail, res); | 1300 kring->rcur, kring->nr_hwtail, 1301 kring->rhead, kring->rcur, ring->tail); |
1253 return n; 1254} 1255 | 1302 return n; 1303} 1304 |
1305 |
|
1256/* 1257 * Error routine called when txsync/rxsync detects an error. | 1306/* 1307 * Error routine called when txsync/rxsync detects an error. |
1258 * Can't do much more than resetting cur = hwcur, avail = hwavail. | 1308 * Can't do much more than resetting head =cur = hwcur, tail = hwtail |
1259 * Return 1 on reinit. 1260 * 1261 * This routine is only called by the upper half of the kernel. 1262 * It only reads hwcur (which is changed only by the upper half, too) | 1309 * Return 1 on reinit. 1310 * 1311 * This routine is only called by the upper half of the kernel. 1312 * It only reads hwcur (which is changed only by the upper half, too) |
1263 * and hwavail (which may be changed by the lower half, but only on | 1313 * and hwtail (which may be changed by the lower half, but only on |
1264 * a tx ring and only to increase it, so any error will be recovered 1265 * on the next call). For the above, we don't strictly need to call 1266 * it under lock. 1267 */ 1268int 1269netmap_ring_reinit(struct netmap_kring *kring) 1270{ 1271 struct netmap_ring *ring = kring->ring; 1272 u_int i, lim = kring->nkr_num_slots - 1; 1273 int errors = 0; 1274 1275 // XXX KASSERT nm_kr_tryget 1276 RD(10, "called for %s", NM_IFPNAME(kring->na->ifp)); | 1314 * a tx ring and only to increase it, so any error will be recovered 1315 * on the next call). For the above, we don't strictly need to call 1316 * it under lock. 1317 */ 1318int 1319netmap_ring_reinit(struct netmap_kring *kring) 1320{ 1321 struct netmap_ring *ring = kring->ring; 1322 u_int i, lim = kring->nkr_num_slots - 1; 1323 int errors = 0; 1324 1325 // XXX KASSERT nm_kr_tryget 1326 RD(10, "called for %s", NM_IFPNAME(kring->na->ifp)); |
1327 // XXX probably wrong to trust userspace 1328 kring->rhead = ring->head; 1329 kring->rcur = ring->cur; 1330 kring->rtail = ring->tail; 1331 |
|
1277 if (ring->cur > lim) 1278 errors++; | 1332 if (ring->cur > lim) 1333 errors++; |
1334 if (ring->head > lim) 1335 errors++; 1336 if (ring->tail > lim) 1337 errors++; |
|
1279 for (i = 0; i <= lim; i++) { 1280 u_int idx = ring->slot[i].buf_idx; 1281 u_int len = ring->slot[i].len; 1282 if (idx < 2 || idx >= netmap_total_buffers) { | 1338 for (i = 0; i <= lim; i++) { 1339 u_int idx = ring->slot[i].buf_idx; 1340 u_int len = ring->slot[i].len; 1341 if (idx < 2 || idx >= netmap_total_buffers) { |
1283 if (!errors++) 1284 D("bad buffer at slot %d idx %d len %d ", i, idx, len); | 1342 RD(5, "bad index at slot %d idx %d len %d ", i, idx, len); |
1285 ring->slot[i].buf_idx = 0; 1286 ring->slot[i].len = 0; 1287 } else if (len > NETMAP_BDG_BUF_SIZE(kring->na->nm_mem)) { 1288 ring->slot[i].len = 0; | 1343 ring->slot[i].buf_idx = 0; 1344 ring->slot[i].len = 0; 1345 } else if (len > NETMAP_BDG_BUF_SIZE(kring->na->nm_mem)) { 1346 ring->slot[i].len = 0; |
1289 if (!errors++) 1290 D("bad len %d at slot %d idx %d", 1291 len, i, idx); | 1347 RD(5, "bad len at slot %d idx %d len %d", i, idx, len); |
1292 } 1293 } 1294 if (errors) { | 1348 } 1349 } 1350 if (errors) { |
1295 int pos = kring - kring->na->tx_rings; 1296 int n = kring->na->num_tx_rings + 1; 1297 | |
1298 RD(10, "total %d errors", errors); | 1351 RD(10, "total %d errors", errors); |
1299 errors++; 1300 RD(10, "%s %s[%d] reinit, cur %d -> %d avail %d -> %d", 1301 NM_IFPNAME(kring->na->ifp), 1302 pos < n ? "TX" : "RX", pos < n ? pos : pos - n, | 1352 RD(10, "%s reinit, cur %d -> %d tail %d -> %d", 1353 kring->name, |
1303 ring->cur, kring->nr_hwcur, | 1354 ring->cur, kring->nr_hwcur, |
1304 ring->avail, kring->nr_hwavail); 1305 ring->cur = kring->nr_hwcur; 1306 ring->avail = kring->nr_hwavail; | 1355 ring->tail, kring->nr_hwtail); 1356 ring->head = kring->rhead = kring->nr_hwcur; 1357 ring->cur = kring->rcur = kring->nr_hwcur; 1358 ring->tail = kring->rtail = kring->nr_hwtail; |
1307 } 1308 return (errors ? 1 : 0); 1309} 1310 1311 1312/* 1313 * Set the ring ID. For devices with a single queue, a request 1314 * for all rings is the same as a single ring. --- 116 unchanged lines hidden (view full) --- 1431 1432/* 1433 * ioctl(2) support for the "netmap" device. 1434 * 1435 * Following a list of accepted commands: 1436 * - NIOCGINFO 1437 * - SIOCGIFADDR just for convenience 1438 * - NIOCREGIF | 1359 } 1360 return (errors ? 1 : 0); 1361} 1362 1363 1364/* 1365 * Set the ring ID. For devices with a single queue, a request 1366 * for all rings is the same as a single ring. --- 116 unchanged lines hidden (view full) --- 1483 1484/* 1485 * ioctl(2) support for the "netmap" device. 1486 * 1487 * Following a list of accepted commands: 1488 * - NIOCGINFO 1489 * - SIOCGIFADDR just for convenience 1490 * - NIOCREGIF |
1439 * - NIOCUNREGIF | |
1440 * - NIOCTXSYNC 1441 * - NIOCRXSYNC 1442 * 1443 * Return 0 on success, errno otherwise. 1444 */ 1445int 1446netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data, 1447 int fflag, struct thread *td) --- 19 unchanged lines hidden (view full) --- 1467 ({ ((struct file *)td)->private_data = p; (p ? 0 : EINVAL); }) 1468 1469 1470#define devfs_clear_cdevpriv() do { \ 1471 netmap_dtor(priv); ((struct file *)td)->private_data = 0; \ 1472 } while (0) 1473#endif /* linux */ 1474 | 1491 * - NIOCTXSYNC 1492 * - NIOCRXSYNC 1493 * 1494 * Return 0 on success, errno otherwise. 1495 */ 1496int 1497netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data, 1498 int fflag, struct thread *td) --- 19 unchanged lines hidden (view full) --- 1518 ({ ((struct file *)td)->private_data = p; (p ? 0 : EINVAL); }) 1519 1520 1521#define devfs_clear_cdevpriv() do { \ 1522 netmap_dtor(priv); ((struct file *)td)->private_data = 0; \ 1523 } while (0) 1524#endif /* linux */ 1525 |
1526 if (cmd == NIOCGINFO || cmd == NIOCREGIF) { 1527 /* truncate name */ 1528 nmr->nr_name[sizeof(nmr->nr_name) - 1] = '\0'; 1529 if (nmr->nr_version != NETMAP_API) { 1530 D("API mismatch for %s got %d need %d", 1531 nmr->nr_name, 1532 nmr->nr_version, NETMAP_API); 1533 nmr->nr_version = NETMAP_API; 1534 return EINVAL; 1535 } 1536 } |
|
1475 CURVNET_SET(TD_TO_VNET(td)); 1476 1477 error = devfs_get_cdevpriv((void **)&priv); 1478 if (error) { 1479 CURVNET_RESTORE(); 1480 /* XXX ENOENT should be impossible, since the priv 1481 * is now created in the open */ 1482 return (error == ENOENT ? ENXIO : error); 1483 } 1484 | 1537 CURVNET_SET(TD_TO_VNET(td)); 1538 1539 error = devfs_get_cdevpriv((void **)&priv); 1540 if (error) { 1541 CURVNET_RESTORE(); 1542 /* XXX ENOENT should be impossible, since the priv 1543 * is now created in the open */ 1544 return (error == ENOENT ? ENXIO : error); 1545 } 1546 |
1485 nmr->nr_name[sizeof(nmr->nr_name) - 1] = '\0'; /* truncate name */ | |
1486 switch (cmd) { 1487 case NIOCGINFO: /* return capabilities etc */ | 1547 switch (cmd) { 1548 case NIOCGINFO: /* return capabilities etc */ |
1488 if (nmr->nr_version != NETMAP_API) { 1489 D("API mismatch got %d have %d", 1490 nmr->nr_version, NETMAP_API); 1491 nmr->nr_version = NETMAP_API; 1492 error = EINVAL; 1493 break; 1494 } | |
1495 if (nmr->nr_cmd == NETMAP_BDG_LIST) { 1496 error = netmap_bdg_ctl(nmr, NULL); 1497 break; 1498 } 1499 1500 NMG_LOCK(); 1501 do { 1502 /* memsize is always valid */ --- 23 unchanged lines hidden (view full) --- 1526 if (memflags & NETMAP_MEM_PRIVATE) 1527 nmr->nr_ringid |= NETMAP_PRIV_MEM; 1528 netmap_adapter_put(na); 1529 } while (0); 1530 NMG_UNLOCK(); 1531 break; 1532 1533 case NIOCREGIF: | 1549 if (nmr->nr_cmd == NETMAP_BDG_LIST) { 1550 error = netmap_bdg_ctl(nmr, NULL); 1551 break; 1552 } 1553 1554 NMG_LOCK(); 1555 do { 1556 /* memsize is always valid */ --- 23 unchanged lines hidden (view full) --- 1580 if (memflags & NETMAP_MEM_PRIVATE) 1581 nmr->nr_ringid |= NETMAP_PRIV_MEM; 1582 netmap_adapter_put(na); 1583 } while (0); 1584 NMG_UNLOCK(); 1585 break; 1586 1587 case NIOCREGIF: |
1534 if (nmr->nr_version != NETMAP_API) { 1535 nmr->nr_version = NETMAP_API; 1536 error = EINVAL; 1537 break; 1538 } | |
1539 /* possibly attach/detach NIC and VALE switch */ 1540 i = nmr->nr_cmd; 1541 if (i == NETMAP_BDG_ATTACH || i == NETMAP_BDG_DETACH 1542 || i == NETMAP_BDG_OFFSET) { 1543 error = netmap_bdg_ctl(nmr, NULL); 1544 break; 1545 } else if (i != 0) { 1546 D("nr_cmd must be 0 not %d", i); --- 41 unchanged lines hidden (view full) --- 1588 nmr->nr_ringid |= NETMAP_PRIV_MEM; 1589 *(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM; 1590 } 1591 nmr->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp); 1592 } while (0); 1593 NMG_UNLOCK(); 1594 break; 1595 | 1588 /* possibly attach/detach NIC and VALE switch */ 1589 i = nmr->nr_cmd; 1590 if (i == NETMAP_BDG_ATTACH || i == NETMAP_BDG_DETACH 1591 || i == NETMAP_BDG_OFFSET) { 1592 error = netmap_bdg_ctl(nmr, NULL); 1593 break; 1594 } else if (i != 0) { 1595 D("nr_cmd must be 0 not %d", i); --- 41 unchanged lines hidden (view full) --- 1637 nmr->nr_ringid |= NETMAP_PRIV_MEM; 1638 *(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM; 1639 } 1640 nmr->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp); 1641 } while (0); 1642 NMG_UNLOCK(); 1643 break; 1644 |
1596 case NIOCUNREGIF: 1597 // XXX we have no data here ? 1598 D("deprecated, data is %p", nmr); 1599 error = EINVAL; 1600 break; 1601 | |
1602 case NIOCTXSYNC: 1603 case NIOCRXSYNC: 1604 nifp = priv->np_nifp; 1605 1606 if (nifp == NULL) { 1607 error = ENXIO; 1608 break; 1609 } --- 34 unchanged lines hidden (view full) --- 1644 error = EBUSY; 1645 goto out; 1646 } 1647 if (cmd == NIOCTXSYNC) { 1648 if (netmap_verbose & NM_VERB_TXSYNC) 1649 D("pre txsync ring %d cur %d hwcur %d", 1650 i, kring->ring->cur, 1651 kring->nr_hwcur); | 1645 case NIOCTXSYNC: 1646 case NIOCRXSYNC: 1647 nifp = priv->np_nifp; 1648 1649 if (nifp == NULL) { 1650 error = ENXIO; 1651 break; 1652 } --- 34 unchanged lines hidden (view full) --- 1687 error = EBUSY; 1688 goto out; 1689 } 1690 if (cmd == NIOCTXSYNC) { 1691 if (netmap_verbose & NM_VERB_TXSYNC) 1692 D("pre txsync ring %d cur %d hwcur %d", 1693 i, kring->ring->cur, 1694 kring->nr_hwcur); |
1652 na->nm_txsync(na, i, NAF_FORCE_RECLAIM); | 1695 if (nm_txsync_prologue(kring) >= kring->nkr_num_slots) { 1696 netmap_ring_reinit(kring); 1697 } else { 1698 na->nm_txsync(na, i, NAF_FORCE_RECLAIM); 1699 } |
1653 if (netmap_verbose & NM_VERB_TXSYNC) 1654 D("post txsync ring %d cur %d hwcur %d", 1655 i, kring->ring->cur, 1656 kring->nr_hwcur); 1657 } else { 1658 na->nm_rxsync(na, i, NAF_FORCE_READ); 1659 microtime(&na->rx_rings[i].ring->ts); 1660 } --- 60 unchanged lines hidden (view full) --- 1721int 1722netmap_poll(struct cdev *dev, int events, struct thread *td) 1723{ 1724 struct netmap_priv_d *priv = NULL; 1725 struct netmap_adapter *na; 1726 struct ifnet *ifp; 1727 struct netmap_kring *kring; 1728 u_int i, check_all_tx, check_all_rx, want_tx, want_rx, revents = 0; | 1700 if (netmap_verbose & NM_VERB_TXSYNC) 1701 D("post txsync ring %d cur %d hwcur %d", 1702 i, kring->ring->cur, 1703 kring->nr_hwcur); 1704 } else { 1705 na->nm_rxsync(na, i, NAF_FORCE_READ); 1706 microtime(&na->rx_rings[i].ring->ts); 1707 } --- 60 unchanged lines hidden (view full) --- 1768int 1769netmap_poll(struct cdev *dev, int events, struct thread *td) 1770{ 1771 struct netmap_priv_d *priv = NULL; 1772 struct netmap_adapter *na; 1773 struct ifnet *ifp; 1774 struct netmap_kring *kring; 1775 u_int i, check_all_tx, check_all_rx, want_tx, want_rx, revents = 0; |
1729 u_int lim_tx, lim_rx, host_forwarded = 0; 1730 struct mbq q; | 1776 u_int lim_tx, lim_rx; 1777 struct mbq q; /* packets from hw queues to host stack */ |
1731 void *pwait = dev; /* linux compatibility */ 1732 1733 /* 1734 * In order to avoid nested locks, we need to "double check" 1735 * txsync and rxsync if we decide to do a selrecord(). 1736 * retry_tx (and retry_rx, later) prevent looping forever. 1737 */ | 1778 void *pwait = dev; /* linux compatibility */ 1779 1780 /* 1781 * In order to avoid nested locks, we need to "double check" 1782 * txsync and rxsync if we decide to do a selrecord(). 1783 * retry_tx (and retry_rx, later) prevent looping forever. 1784 */ |
1738 int retry_tx = 1; | 1785 int retry_tx = 1, retry_rx = 1; |
1739 1740 (void)pwait; 1741 mbq_init(&q); 1742 1743 if (devfs_get_cdevpriv((void **)&priv) != 0 || priv == NULL) 1744 return POLLERR; 1745 1746 if (priv->np_nifp == NULL) { --- 17 unchanged lines hidden (view full) --- 1764 D("device %s events 0x%x", NM_IFPNAME(ifp), events); 1765 want_tx = events & (POLLOUT | POLLWRNORM); 1766 want_rx = events & (POLLIN | POLLRDNORM); 1767 1768 lim_tx = na->num_tx_rings; 1769 lim_rx = na->num_rx_rings; 1770 1771 if (priv->np_qfirst == NETMAP_SW_RING) { | 1786 1787 (void)pwait; 1788 mbq_init(&q); 1789 1790 if (devfs_get_cdevpriv((void **)&priv) != 0 || priv == NULL) 1791 return POLLERR; 1792 1793 if (priv->np_nifp == NULL) { --- 17 unchanged lines hidden (view full) --- 1811 D("device %s events 0x%x", NM_IFPNAME(ifp), events); 1812 want_tx = events & (POLLOUT | POLLWRNORM); 1813 want_rx = events & (POLLIN | POLLRDNORM); 1814 1815 lim_tx = na->num_tx_rings; 1816 lim_rx = na->num_rx_rings; 1817 1818 if (priv->np_qfirst == NETMAP_SW_RING) { |
1819 // XXX locking ? |
|
1772 /* handle the host stack ring */ 1773 if (priv->np_txpoll || want_tx) { 1774 /* push any packets up, then we are always ready */ 1775 netmap_txsync_to_host(na); 1776 revents |= want_tx; 1777 } 1778 if (want_rx) { 1779 kring = &na->rx_rings[lim_rx]; | 1820 /* handle the host stack ring */ 1821 if (priv->np_txpoll || want_tx) { 1822 /* push any packets up, then we are always ready */ 1823 netmap_txsync_to_host(na); 1824 revents |= want_tx; 1825 } 1826 if (want_rx) { 1827 kring = &na->rx_rings[lim_rx]; |
1780 if (kring->ring->avail == 0) | 1828 /* XXX replace with rxprologue etc. */ 1829 if (nm_ring_empty(kring->ring)) |
1781 netmap_rxsync_from_host(na, td, dev); | 1830 netmap_rxsync_from_host(na, td, dev); |
1782 if (kring->ring->avail > 0) { | 1831 if (!nm_ring_empty(kring->ring)) |
1783 revents |= want_rx; | 1832 revents |= want_rx; |
1784 } | |
1785 } 1786 return (revents); 1787 } 1788 | 1833 } 1834 return (revents); 1835 } 1836 |
1789 /* 1790 * If we are in transparent mode, check also the host rx ring 1791 * XXX Transparent mode at the moment requires to bind all 1792 * rings to a single file descriptor. 1793 */ 1794 kring = &na->rx_rings[lim_rx]; 1795 if ( (priv->np_qlast == NETMAP_HW_RING) // XXX check_all 1796 && want_rx 1797 && (netmap_fwd || kring->ring->flags & NR_FORWARD) ) { 1798 if (kring->ring->avail == 0) 1799 netmap_rxsync_from_host(na, td, dev); 1800 if (kring->ring->avail > 0) 1801 revents |= want_rx; 1802 } | |
1803 1804 /* 1805 * check_all_{tx|rx} are set if the card has more than one queue AND 1806 * the file descriptor is bound to all of them. If so, we sleep on 1807 * the "global" selinfo, otherwise we sleep on individual selinfo 1808 * (FreeBSD only allows two selinfo's per file descriptor). 1809 * The interrupt routine in the driver wake one or the other 1810 * (or both) depending on which clients are active. --- 9 unchanged lines hidden (view full) --- 1820 if (priv->np_qlast != NETMAP_HW_RING) { 1821 lim_tx = lim_rx = priv->np_qlast; 1822 } 1823 1824 /* 1825 * We start with a lock free round which is cheap if we have 1826 * slots available. If this fails, then lock and call the sync 1827 * routines. | 1837 1838 /* 1839 * check_all_{tx|rx} are set if the card has more than one queue AND 1840 * the file descriptor is bound to all of them. If so, we sleep on 1841 * the "global" selinfo, otherwise we sleep on individual selinfo 1842 * (FreeBSD only allows two selinfo's per file descriptor). 1843 * The interrupt routine in the driver wake one or the other 1844 * (or both) depending on which clients are active. --- 9 unchanged lines hidden (view full) --- 1854 if (priv->np_qlast != NETMAP_HW_RING) { 1855 lim_tx = lim_rx = priv->np_qlast; 1856 } 1857 1858 /* 1859 * We start with a lock free round which is cheap if we have 1860 * slots available. If this fails, then lock and call the sync 1861 * routines. |
1828 * XXX rather than ring->avail >0 should check that 1829 * ring->cur has not reached hwcur+hwavail | |
1830 */ 1831 for (i = priv->np_qfirst; want_rx && i < lim_rx; i++) { 1832 kring = &na->rx_rings[i]; | 1862 */ 1863 for (i = priv->np_qfirst; want_rx && i < lim_rx; i++) { 1864 kring = &na->rx_rings[i]; |
1833 if (kring->ring->avail > 0) { | 1865 /* XXX compare ring->cur and kring->tail */ 1866 if (!nm_ring_empty(kring->ring)) { |
1834 revents |= want_rx; 1835 want_rx = 0; /* also breaks the loop */ 1836 } 1837 } 1838 for (i = priv->np_qfirst; want_tx && i < lim_tx; i++) { 1839 kring = &na->tx_rings[i]; | 1867 revents |= want_rx; 1868 want_rx = 0; /* also breaks the loop */ 1869 } 1870 } 1871 for (i = priv->np_qfirst; want_tx && i < lim_tx; i++) { 1872 kring = &na->tx_rings[i]; |
1840 if (kring->ring->avail > 0) { | 1873 /* XXX compare ring->cur and kring->tail */ 1874 if (!nm_ring_empty(kring->ring)) { |
1841 revents |= want_tx; 1842 want_tx = 0; /* also breaks the loop */ 1843 } 1844 } 1845 1846 /* | 1875 revents |= want_tx; 1876 want_tx = 0; /* also breaks the loop */ 1877 } 1878 } 1879 1880 /* |
1847 * If we to push packets out (priv->np_txpoll) or want_tx is 1848 * still set, we do need to run the txsync calls (on all rings, 1849 * to avoid that the tx rings stall). | 1881 * If we want to push packets out (priv->np_txpoll) or 1882 * want_tx is still set, we must issue txsync calls 1883 * (on all rings, to avoid that the tx rings stall). |
1850 * XXX should also check cur != hwcur on the tx rings. 1851 * Fortunately, normal tx mode has np_txpoll set. 1852 */ 1853 if (priv->np_txpoll || want_tx) { | 1884 * XXX should also check cur != hwcur on the tx rings. 1885 * Fortunately, normal tx mode has np_txpoll set. 1886 */ 1887 if (priv->np_txpoll || want_tx) { |
1854 /* If we really want to be woken up (want_tx), 1855 * do a selrecord, either on the global or on 1856 * the private structure. Then issue the txsync 1857 * so there is no race in the selrecord/selwait | 1888 /* 1889 * The first round checks if anyone is ready, if not 1890 * do a selrecord and another round to handle races. 1891 * want_tx goes to 0 if any space is found, and is 1892 * used to skip rings with no pending transmissions. |
1858 */ 1859flush_tx: 1860 for (i = priv->np_qfirst; i < lim_tx; i++) { | 1893 */ 1894flush_tx: 1895 for (i = priv->np_qfirst; i < lim_tx; i++) { |
1896 int found = 0; 1897 |
|
1861 kring = &na->tx_rings[i]; | 1898 kring = &na->tx_rings[i]; |
1862 /* 1863 * Skip this ring if want_tx == 0 1864 * (we have already done a successful sync on 1865 * a previous ring) AND kring->cur == kring->hwcur 1866 * (there are no pending transmissions for this ring). 1867 */ | |
1868 if (!want_tx && kring->ring->cur == kring->nr_hwcur) 1869 continue; | 1899 if (!want_tx && kring->ring->cur == kring->nr_hwcur) 1900 continue; |
1870 /* make sure only one user thread is doing this */ | 1901 /* only one thread does txsync */ |
1871 if (nm_kr_tryget(kring)) { | 1902 if (nm_kr_tryget(kring)) { |
1872 ND("ring %p busy is %d", 1873 kring, (int)kring->nr_busy); | 1903 D("%p lost race on txring %d, ok", priv, i); 1904 continue; 1905 } 1906 if (nm_txsync_prologue(kring) >= kring->nkr_num_slots) { 1907 netmap_ring_reinit(kring); |
1874 revents |= POLLERR; | 1908 revents |= POLLERR; |
1875 goto out; | 1909 } else { 1910 if (na->nm_txsync(na, i, 0)) 1911 revents |= POLLERR; |
1876 } 1877 | 1912 } 1913 |
1878 if (netmap_verbose & NM_VERB_TXSYNC) 1879 D("send %d on %s %d", 1880 kring->ring->cur, NM_IFPNAME(ifp), i); 1881 if (na->nm_txsync(na, i, 0)) 1882 revents |= POLLERR; 1883 1884 /* Check avail and call selrecord only if 1885 * called with POLLOUT and run out of bufs. 1886 * XXX Note, we cannot trust much ring->avail 1887 * as it is exposed to userspace (even though 1888 * just updated by txsync). We should really 1889 * check kring->nr_hwavail or better have 1890 * txsync set a flag telling if we need 1891 * to do a selrecord(). | 1914 /* 1915 * If we found new slots, notify potential 1916 * listeners on the same ring. 1917 * Since we just did a txsync, look at the copies 1918 * of cur,tail in the kring. |
1892 */ | 1919 */ |
1893 if (want_tx) { 1894 if (kring->ring->avail > 0) { 1895 /* stop at the first ring. We don't risk 1896 * starvation. 1897 */ 1898 revents |= want_tx; 1899 want_tx = 0; 1900 } 1901 } | 1920 found = kring->rcur != kring->rtail; |
1902 nm_kr_put(kring); | 1921 nm_kr_put(kring); |
1922 if (found) { /* notify other listeners */ 1923 revents |= want_tx; 1924 want_tx = 0; 1925 na->nm_notify(na, i, NR_TX, NAF_GLOBAL_NOTIFY); 1926 } |
|
1903 } 1904 if (want_tx && retry_tx) { 1905 selrecord(td, check_all_tx ? 1906 &na->tx_si : &na->tx_rings[priv->np_qfirst].si); 1907 retry_tx = 0; 1908 goto flush_tx; 1909 } 1910 } 1911 1912 /* | 1927 } 1928 if (want_tx && retry_tx) { 1929 selrecord(td, check_all_tx ? 1930 &na->tx_si : &na->tx_rings[priv->np_qfirst].si); 1931 retry_tx = 0; 1932 goto flush_tx; 1933 } 1934 } 1935 1936 /* |
1913 * now if want_rx is still set we need to lock and rxsync. | 1937 * If want_rx is still set scan receive rings. |
1914 * Do it on all rings because otherwise we starve. 1915 */ 1916 if (want_rx) { | 1938 * Do it on all rings because otherwise we starve. 1939 */ 1940 if (want_rx) { |
1917 int retry_rx = 1; | 1941 int send_down = 0; /* transparent mode */ 1942 /* two rounds here to for race avoidance */ |
1918do_retry_rx: 1919 for (i = priv->np_qfirst; i < lim_rx; i++) { | 1943do_retry_rx: 1944 for (i = priv->np_qfirst; i < lim_rx; i++) { |
1945 int found = 0; 1946 |
|
1920 kring = &na->rx_rings[i]; 1921 1922 if (nm_kr_tryget(kring)) { | 1947 kring = &na->rx_rings[i]; 1948 1949 if (nm_kr_tryget(kring)) { |
1923 revents |= POLLERR; 1924 goto out; | 1950 D("%p lost race on rxring %d, ok", priv, i); 1951 continue; |
1925 } 1926 | 1952 } 1953 |
1927 /* XXX NR_FORWARD should only be read on | 1954 /* 1955 * transparent mode support: collect packets 1956 * from the rxring(s). 1957 * XXX NR_FORWARD should only be read on |
1928 * physical or NIC ports 1929 */ 1930 if (netmap_fwd ||kring->ring->flags & NR_FORWARD) { 1931 ND(10, "forwarding some buffers up %d to %d", 1932 kring->nr_hwcur, kring->ring->cur); 1933 netmap_grab_packets(kring, &q, netmap_fwd); 1934 } 1935 1936 if (na->nm_rxsync(na, i, 0)) 1937 revents |= POLLERR; 1938 if (netmap_no_timestamp == 0 || 1939 kring->ring->flags & NR_TIMESTAMP) { 1940 microtime(&kring->ring->ts); 1941 } | 1958 * physical or NIC ports 1959 */ 1960 if (netmap_fwd ||kring->ring->flags & NR_FORWARD) { 1961 ND(10, "forwarding some buffers up %d to %d", 1962 kring->nr_hwcur, kring->ring->cur); 1963 netmap_grab_packets(kring, &q, netmap_fwd); 1964 } 1965 1966 if (na->nm_rxsync(na, i, 0)) 1967 revents |= POLLERR; 1968 if (netmap_no_timestamp == 0 || 1969 kring->ring->flags & NR_TIMESTAMP) { 1970 microtime(&kring->ring->ts); 1971 } |
1942 1943 if (kring->ring->avail > 0) { | 1972 /* after an rxsync we can use kring->rcur, rtail */ 1973 found = kring->rcur != kring->rtail; 1974 nm_kr_put(kring); 1975 if (found) { |
1944 revents |= want_rx; 1945 retry_rx = 0; | 1976 revents |= want_rx; 1977 retry_rx = 0; |
1978 na->nm_notify(na, i, NR_RX, NAF_GLOBAL_NOTIFY); |
|
1946 } | 1979 } |
1947 nm_kr_put(kring); | |
1948 } | 1980 } |
1949 if (retry_rx) { 1950 retry_rx = 0; | 1981 1982 /* transparent mode XXX only during first pass ? */ 1983 kring = &na->rx_rings[lim_rx]; 1984 if (check_all_rx 1985 && (netmap_fwd || kring->ring->flags & NR_FORWARD)) { 1986 /* XXX fix to use kring fields */ 1987 if (nm_ring_empty(kring->ring)) 1988 send_down = netmap_rxsync_from_host(na, td, dev); 1989 if (!nm_ring_empty(kring->ring)) 1990 revents |= want_rx; 1991 } 1992 1993 if (retry_rx) |
1951 selrecord(td, check_all_rx ? 1952 &na->rx_si : &na->rx_rings[priv->np_qfirst].si); | 1994 selrecord(td, check_all_rx ? 1995 &na->rx_si : &na->rx_rings[priv->np_qfirst].si); |
1953 goto do_retry_rx; | 1996 if (send_down > 0 || retry_rx) { 1997 retry_rx = 0; 1998 if (send_down) 1999 goto flush_tx; /* and retry_rx */ 2000 else 2001 goto do_retry_rx; |
1954 } 1955 } 1956 | 2002 } 2003 } 2004 |
1957 /* forward host to the netmap ring. 1958 * I am accessing nr_hwavail without lock, but netmap_transmit 1959 * can only increment it, so the operation is safe. | 2005 /* 2006 * Transparent mode: marked bufs on rx rings between 2007 * kring->nr_hwcur and ring->head 2008 * are passed to the other endpoint. 2009 * 2010 * In this mode we also scan the sw rxring, which in 2011 * turn passes packets up. 2012 * 2013 * XXX Transparent mode at the moment requires to bind all 2014 * rings to a single file descriptor. |
1960 */ | 2015 */ |
1961 kring = &na->rx_rings[lim_rx]; 1962 if ( (priv->np_qlast == NETMAP_HW_RING) // XXX check_all 1963 && (netmap_fwd || kring->ring->flags & NR_FORWARD) 1964 && kring->nr_hwavail > 0 && !host_forwarded) { 1965 netmap_sw_to_nic(na); 1966 host_forwarded = 1; /* prevent another pass */ 1967 want_rx = 0; 1968 goto flush_tx; 1969 } | |
1970 1971 if (q.head) 1972 netmap_send_up(na->ifp, &q); 1973 | 2016 2017 if (q.head) 2018 netmap_send_up(na->ifp, &q); 2019 |
1974out: 1975 | |
1976 return (revents); 1977} 1978 | 2020 return (revents); 2021} 2022 |
1979/*------- driver support routines ------*/ | |
1980 | 2023 |
2024/*-------------------- driver support routines -------------------*/ 2025 |
|
1981static int netmap_hw_krings_create(struct netmap_adapter *); 1982 1983static int | 2026static int netmap_hw_krings_create(struct netmap_adapter *); 2027 2028static int |
1984netmap_notify(struct netmap_adapter *na, u_int n_ring, enum txrx tx, int flags) | 2029netmap_notify(struct netmap_adapter *na, u_int n_ring, 2030 enum txrx tx, int flags) |
1985{ 1986 struct netmap_kring *kring; 1987 1988 if (tx == NR_TX) { 1989 kring = na->tx_rings + n_ring; 1990 selwakeuppri(&kring->si, PI_NET); 1991 if (flags & NAF_GLOBAL_NOTIFY) 1992 selwakeuppri(&na->tx_si, PI_NET); --- 14 unchanged lines hidden (view full) --- 2007 struct ifnet *ifp = na->ifp; 2008 2009 if (na->num_tx_rings == 0 || na->num_rx_rings == 0) { 2010 D("%s: invalid rings tx %d rx %d", 2011 ifp->if_xname, na->num_tx_rings, na->num_rx_rings); 2012 return EINVAL; 2013 } 2014 WNA(ifp) = na; | 2031{ 2032 struct netmap_kring *kring; 2033 2034 if (tx == NR_TX) { 2035 kring = na->tx_rings + n_ring; 2036 selwakeuppri(&kring->si, PI_NET); 2037 if (flags & NAF_GLOBAL_NOTIFY) 2038 selwakeuppri(&na->tx_si, PI_NET); --- 14 unchanged lines hidden (view full) --- 2053 struct ifnet *ifp = na->ifp; 2054 2055 if (na->num_tx_rings == 0 || na->num_rx_rings == 0) { 2056 D("%s: invalid rings tx %d rx %d", 2057 ifp->if_xname, na->num_tx_rings, na->num_rx_rings); 2058 return EINVAL; 2059 } 2060 WNA(ifp) = na; |
2061 2062 /* the following is only needed for na that use the host port. 2063 * XXX do we have something similar for linux ? 2064 */ 2065#ifdef __FreeBSD__ 2066 na->if_input = ifp->if_input; /* for netmap_send_up */ 2067#endif /* __FreeBSD__ */ 2068 |
|
2015 NETMAP_SET_CAPABLE(ifp); 2016 if (na->nm_krings_create == NULL) { 2017 na->nm_krings_create = netmap_hw_krings_create; | 2069 NETMAP_SET_CAPABLE(ifp); 2070 if (na->nm_krings_create == NULL) { 2071 na->nm_krings_create = netmap_hw_krings_create; |
2018 na->nm_krings_delete = netmap_krings_delete; | 2072 na->nm_krings_delete = netmap_hw_krings_delete; |
2019 } 2020 if (na->nm_notify == NULL) 2021 na->nm_notify = netmap_notify; 2022 na->active_fds = 0; 2023 2024 if (na->nm_mem == NULL) 2025 na->nm_mem = &nm_mem; 2026 return 0; --- 19 unchanged lines hidden (view full) --- 2046 2047/* 2048 * Initialize a ``netmap_adapter`` object created by driver on attach. 2049 * We allocate a block of memory with room for a struct netmap_adapter 2050 * plus two sets of N+2 struct netmap_kring (where N is the number 2051 * of hardware rings): 2052 * krings 0..N-1 are for the hardware queues. 2053 * kring N is for the host stack queue | 2073 } 2074 if (na->nm_notify == NULL) 2075 na->nm_notify = netmap_notify; 2076 na->active_fds = 0; 2077 2078 if (na->nm_mem == NULL) 2079 na->nm_mem = &nm_mem; 2080 return 0; --- 19 unchanged lines hidden (view full) --- 2100 2101/* 2102 * Initialize a ``netmap_adapter`` object created by driver on attach. 2103 * We allocate a block of memory with room for a struct netmap_adapter 2104 * plus two sets of N+2 struct netmap_kring (where N is the number 2105 * of hardware rings): 2106 * krings 0..N-1 are for the hardware queues. 2107 * kring N is for the host stack queue |
2054 * kring N+1 is only used for the selinfo for all queues. | 2108 * kring N+1 is only used for the selinfo for all queues. // XXX still true ? |
2055 * Return 0 on success, ENOMEM otherwise. | 2109 * Return 0 on success, ENOMEM otherwise. |
2056 * 2057 * By default the receive and transmit adapter ring counts are both initialized 2058 * to num_queues. na->num_tx_rings can be set for cards with different tx/rx 2059 * setups. | |
2060 */ 2061int 2062netmap_attach(struct netmap_adapter *arg) 2063{ 2064 struct netmap_hw_adapter *hwna = NULL; 2065 // XXX when is arg == NULL ? 2066 struct ifnet *ifp = arg ? arg->ifp : NULL; 2067 --- 59 unchanged lines hidden (view full) --- 2127 2128 return 1; 2129} 2130 2131 2132int 2133netmap_hw_krings_create(struct netmap_adapter *na) 2134{ | 2110 */ 2111int 2112netmap_attach(struct netmap_adapter *arg) 2113{ 2114 struct netmap_hw_adapter *hwna = NULL; 2115 // XXX when is arg == NULL ? 2116 struct ifnet *ifp = arg ? arg->ifp : NULL; 2117 --- 59 unchanged lines hidden (view full) --- 2177 2178 return 1; 2179} 2180 2181 2182int 2183netmap_hw_krings_create(struct netmap_adapter *na) 2184{ |
2135 return netmap_krings_create(na, | 2185 int ret = netmap_krings_create(na, |
2136 na->num_tx_rings + 1, na->num_rx_rings + 1, 0); | 2186 na->num_tx_rings + 1, na->num_rx_rings + 1, 0); |
2187 if (ret == 0) { 2188 /* initialize the mbq for the sw rx ring */ 2189 mbq_safe_init(&na->rx_rings[na->num_rx_rings].rx_queue); 2190 ND("initialized sw rx queue %d", na->num_rx_rings); 2191 } 2192 return ret; |
|
2137} 2138 2139 2140 2141/* 2142 * Free the allocated memory linked to the given ``netmap_adapter`` 2143 * object. 2144 */ --- 12 unchanged lines hidden (view full) --- 2157 netmap_enable_all_rings(ifp); 2158 NMG_UNLOCK(); 2159} 2160 2161 2162/* 2163 * Intercept packets from the network stack and pass them 2164 * to netmap as incoming packets on the 'software' ring. | 2193} 2194 2195 2196 2197/* 2198 * Free the allocated memory linked to the given ``netmap_adapter`` 2199 * object. 2200 */ --- 12 unchanged lines hidden (view full) --- 2213 netmap_enable_all_rings(ifp); 2214 NMG_UNLOCK(); 2215} 2216 2217 2218/* 2219 * Intercept packets from the network stack and pass them 2220 * to netmap as incoming packets on the 'software' ring. |
2221 * 2222 * We only store packets in a bounded mbq and then copy them 2223 * in the relevant rxsync routine. 2224 * |
|
2165 * We rely on the OS to make sure that the ifp and na do not go 2166 * away (typically the caller checks for IFF_DRV_RUNNING or the like). 2167 * In nm_register() or whenever there is a reinitialization, 2168 * we make sure to make the mode change visible here. 2169 */ 2170int 2171netmap_transmit(struct ifnet *ifp, struct mbuf *m) 2172{ 2173 struct netmap_adapter *na = NA(ifp); 2174 struct netmap_kring *kring; | 2225 * We rely on the OS to make sure that the ifp and na do not go 2226 * away (typically the caller checks for IFF_DRV_RUNNING or the like). 2227 * In nm_register() or whenever there is a reinitialization, 2228 * we make sure to make the mode change visible here. 2229 */ 2230int 2231netmap_transmit(struct ifnet *ifp, struct mbuf *m) 2232{ 2233 struct netmap_adapter *na = NA(ifp); 2234 struct netmap_kring *kring; |
2175 u_int i, len = MBUF_LEN(m); 2176 u_int error = EBUSY, lim; 2177 struct netmap_slot *slot; | 2235 u_int len = MBUF_LEN(m); 2236 u_int error = ENOBUFS; 2237 struct mbq *q; 2238 int space; |
2178 2179 // XXX [Linux] we do not need this lock 2180 // if we follow the down/configure/up protocol -gl 2181 // mtx_lock(&na->core_lock); | 2239 2240 // XXX [Linux] we do not need this lock 2241 // if we follow the down/configure/up protocol -gl 2242 // mtx_lock(&na->core_lock); |
2243 |
|
2182 if ( (ifp->if_capenable & IFCAP_NETMAP) == 0) { | 2244 if ( (ifp->if_capenable & IFCAP_NETMAP) == 0) { |
2183 /* interface not in netmap mode anymore */ | 2245 D("%s not in netmap mode anymore", NM_IFPNAME(ifp)); |
2184 error = ENXIO; 2185 goto done; 2186 } 2187 2188 kring = &na->rx_rings[na->num_rx_rings]; | 2246 error = ENXIO; 2247 goto done; 2248 } 2249 2250 kring = &na->rx_rings[na->num_rx_rings]; |
2189 lim = kring->nkr_num_slots - 1; 2190 if (netmap_verbose & NM_VERB_HOST) 2191 D("%s packet %d len %d from the stack", NM_IFPNAME(ifp), 2192 kring->nr_hwcur + kring->nr_hwavail, len); | 2251 q = &kring->rx_queue; 2252 |
2193 // XXX reconsider long packets if we handle fragments 2194 if (len > NETMAP_BDG_BUF_SIZE(na->nm_mem)) { /* too long for us */ 2195 D("%s from_host, drop packet size %d > %d", NM_IFPNAME(ifp), 2196 len, NETMAP_BDG_BUF_SIZE(na->nm_mem)); 2197 goto done; 2198 } | 2253 // XXX reconsider long packets if we handle fragments 2254 if (len > NETMAP_BDG_BUF_SIZE(na->nm_mem)) { /* too long for us */ 2255 D("%s from_host, drop packet size %d > %d", NM_IFPNAME(ifp), 2256 len, NETMAP_BDG_BUF_SIZE(na->nm_mem)); 2257 goto done; 2258 } |
2199 /* protect against other instances of netmap_transmit, 2200 * and userspace invocations of rxsync(). | 2259 2260 /* protect against rxsync_from_host(), netmap_sw_to_nic() 2261 * and maybe other instances of netmap_transmit (the latter 2262 * not possible on Linux). 2263 * Also avoid overflowing the queue. |
2201 */ | 2264 */ |
2202 // XXX [Linux] there can be no other instances of netmap_transmit 2203 // on this same ring, but we still need this lock to protect 2204 // concurrent access from netmap_sw_to_nic() -gl 2205 mtx_lock(&kring->q_lock); 2206 if (kring->nr_hwavail >= lim) { 2207 if (netmap_verbose) 2208 D("stack ring %s full\n", NM_IFPNAME(ifp)); | 2265 mtx_lock(&q->lock); 2266 2267 space = kring->nr_hwtail - kring->nr_hwcur; 2268 if (space < 0) 2269 space += kring->nkr_num_slots; 2270 if (space + mbq_len(q) >= kring->nkr_num_slots - 1) { // XXX 2271 RD(10, "%s full hwcur %d hwtail %d qlen %d len %d m %p", 2272 NM_IFPNAME(ifp), kring->nr_hwcur, kring->nr_hwtail, mbq_len(q), 2273 len, m); |
2209 } else { | 2274 } else { |
2210 /* compute the insert position */ 2211 i = nm_kr_rxpos(kring); 2212 slot = &kring->ring->slot[i]; 2213 m_copydata(m, 0, (int)len, BDG_NMB(na, slot)); 2214 slot->len = len; 2215 slot->flags = kring->nkr_slot_flags; 2216 kring->nr_hwavail++; 2217 if (netmap_verbose & NM_VERB_HOST) 2218 D("wake up host ring %s %d", NM_IFPNAME(na->ifp), na->num_rx_rings); 2219 na->nm_notify(na, na->num_rx_rings, NR_RX, 0); | 2275 mbq_enqueue(q, m); 2276 ND(10, "%s %d bufs in queue len %d m %p", 2277 NM_IFPNAME(ifp), mbq_len(q), len, m); 2278 /* notify outside the lock */ 2279 m = NULL; |
2220 error = 0; 2221 } | 2280 error = 0; 2281 } |
2222 mtx_unlock(&kring->q_lock); | 2282 mtx_unlock(&q->lock); |
2223 2224done: | 2283 2284done: |
2225 // mtx_unlock(&na->core_lock); | 2285 if (m) 2286 m_freem(m); 2287 /* unconditionally wake up listeners */ 2288 na->nm_notify(na, na->num_rx_rings, NR_RX, 0); |
2226 | 2289 |
2227 /* release the mbuf in either cases of success or failure. As an 2228 * alternative, put the mbuf in a free list and free the list 2229 * only when really necessary. 2230 */ 2231 m_freem(m); 2232 | |
2233 return (error); 2234} 2235 2236 2237/* 2238 * netmap_reset() is called by the driver routines when reinitializing 2239 * a ring. The driver is in charge of locking to protect the kring. 2240 * If native netmap mode is not set just return NULL. --- 21 unchanged lines hidden (view full) --- 2262 * - set a RESET flag somewhere in the kring 2263 * - do the processing in a conservative way 2264 * - let the *sync() fixup at the end. 2265 */ 2266 if (tx == NR_TX) { 2267 if (n >= na->num_tx_rings) 2268 return NULL; 2269 kring = na->tx_rings + n; | 2290 return (error); 2291} 2292 2293 2294/* 2295 * netmap_reset() is called by the driver routines when reinitializing 2296 * a ring. The driver is in charge of locking to protect the kring. 2297 * If native netmap mode is not set just return NULL. --- 21 unchanged lines hidden (view full) --- 2319 * - set a RESET flag somewhere in the kring 2320 * - do the processing in a conservative way 2321 * - let the *sync() fixup at the end. 2322 */ 2323 if (tx == NR_TX) { 2324 if (n >= na->num_tx_rings) 2325 return NULL; 2326 kring = na->tx_rings + n; |
2327 // XXX check whether we should use hwcur or rcur |
|
2270 new_hwofs = kring->nr_hwcur - new_cur; 2271 } else { 2272 if (n >= na->num_rx_rings) 2273 return NULL; 2274 kring = na->rx_rings + n; | 2328 new_hwofs = kring->nr_hwcur - new_cur; 2329 } else { 2330 if (n >= na->num_rx_rings) 2331 return NULL; 2332 kring = na->rx_rings + n; |
2275 new_hwofs = kring->nr_hwcur + kring->nr_hwavail - new_cur; | 2333 new_hwofs = kring->nr_hwtail - new_cur; |
2276 } 2277 lim = kring->nkr_num_slots - 1; 2278 if (new_hwofs > lim) 2279 new_hwofs -= lim + 1; 2280 2281 /* Always set the new offset value and realign the ring. */ | 2334 } 2335 lim = kring->nkr_num_slots - 1; 2336 if (new_hwofs > lim) 2337 new_hwofs -= lim + 1; 2338 2339 /* Always set the new offset value and realign the ring. */ |
2282 D("%s hwofs %d -> %d, hwavail %d -> %d", 2283 tx == NR_TX ? "TX" : "RX", | 2340 if (netmap_verbose) 2341 D("%s %s%d hwofs %d -> %d, hwtail %d -> %d", 2342 NM_IFPNAME(na->ifp), 2343 tx == NR_TX ? "TX" : "RX", n, |
2284 kring->nkr_hwofs, new_hwofs, | 2344 kring->nkr_hwofs, new_hwofs, |
2285 kring->nr_hwavail, 2286 tx == NR_TX ? lim : kring->nr_hwavail); | 2345 kring->nr_hwtail, 2346 tx == NR_TX ? lim : kring->nr_hwtail); |
2287 kring->nkr_hwofs = new_hwofs; | 2347 kring->nkr_hwofs = new_hwofs; |
2288 if (tx == NR_TX) 2289 kring->nr_hwavail = lim; 2290 kring->nr_hwreserved = 0; | 2348 if (tx == NR_TX) { 2349 kring->nr_hwtail = kring->nr_hwcur + lim; 2350 if (kring->nr_hwtail > lim) 2351 kring->nr_hwtail -= lim + 1; 2352 } |
2291 2292#if 0 // def linux 2293 /* XXX check that the mappings are correct */ 2294 /* need ring_nr, adapter->pdev, direction */ 2295 buffer_info->dma = dma_map_single(&pdev->dev, addr, adapter->rx_buffer_len, DMA_FROM_DEVICE); 2296 if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) { 2297 D("error mapping rx netmap buffer %d", i); 2298 // XXX fix error handling --- 47 unchanged lines hidden (view full) --- 2346 if (q >= na->num_tx_rings) 2347 return; // not a physical queue 2348 kring = na->tx_rings + q; 2349 na->nm_notify(na, q, NR_TX, 2350 (na->num_tx_rings > 1 ? NAF_GLOBAL_NOTIFY : 0)); 2351 } 2352} 2353 | 2353 2354#if 0 // def linux 2355 /* XXX check that the mappings are correct */ 2356 /* need ring_nr, adapter->pdev, direction */ 2357 buffer_info->dma = dma_map_single(&pdev->dev, addr, adapter->rx_buffer_len, DMA_FROM_DEVICE); 2358 if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) { 2359 D("error mapping rx netmap buffer %d", i); 2360 // XXX fix error handling --- 47 unchanged lines hidden (view full) --- 2408 if (q >= na->num_tx_rings) 2409 return; // not a physical queue 2410 kring = na->tx_rings + q; 2411 na->nm_notify(na, q, NR_TX, 2412 (na->num_tx_rings > 1 ? NAF_GLOBAL_NOTIFY : 0)); 2413 } 2414} 2415 |
2416 |
|
2354/* 2355 * Default functions to handle rx/tx interrupts from a physical device. 2356 * "work_done" is non-null on the RX path, NULL for the TX path. 2357 * 2358 * If the card is not in netmap mode, simply return 0, 2359 * so that the caller proceeds with regular processing. 2360 * Otherwise call netmap_common_irq() and return 1. 2361 * --- 30 unchanged lines hidden (view full) --- 2392 * (but there is no chance) 2393 * 2394 * netmap_fini() destroys everything. 2395 */ 2396 2397static struct cdev *netmap_dev; /* /dev/netmap character device. */ 2398extern struct cdevsw netmap_cdevsw; 2399 | 2417/* 2418 * Default functions to handle rx/tx interrupts from a physical device. 2419 * "work_done" is non-null on the RX path, NULL for the TX path. 2420 * 2421 * If the card is not in netmap mode, simply return 0, 2422 * so that the caller proceeds with regular processing. 2423 * Otherwise call netmap_common_irq() and return 1. 2424 * --- 30 unchanged lines hidden (view full) --- 2455 * (but there is no chance) 2456 * 2457 * netmap_fini() destroys everything. 2458 */ 2459 2460static struct cdev *netmap_dev; /* /dev/netmap character device. */ 2461extern struct cdevsw netmap_cdevsw; 2462 |
2463 |
|
2400void 2401netmap_fini(void) 2402{ 2403 // XXX destroy_bridges() ? 2404 if (netmap_dev) 2405 destroy_dev(netmap_dev); 2406 netmap_mem_fini(); 2407 NMG_LOCK_DESTROY(); 2408 printf("netmap: unloaded module.\n"); 2409} 2410 | 2464void 2465netmap_fini(void) 2466{ 2467 // XXX destroy_bridges() ? 2468 if (netmap_dev) 2469 destroy_dev(netmap_dev); 2470 netmap_mem_fini(); 2471 NMG_LOCK_DESTROY(); 2472 printf("netmap: unloaded module.\n"); 2473} 2474 |
2475 |
|
2411int 2412netmap_init(void) 2413{ 2414 int error; 2415 2416 NMG_LOCK_INIT(); 2417 2418 error = netmap_mem_init(); --- 15 unchanged lines hidden --- | 2476int 2477netmap_init(void) 2478{ 2479 int error; 2480 2481 NMG_LOCK_INIT(); 2482 2483 error = netmap_mem_init(); --- 15 unchanged lines hidden --- |