Deleted Added
full compact
netmap.c (259538) netmap.c (260368)
1/*
1/*
2 * Copyright (C) 2011-2013 Matteo Landi, Luigi Rizzo. All rights reserved.
2 * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the

--- 9 unchanged lines hidden (view full) ---

20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26
27/*
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the

--- 9 unchanged lines hidden (view full) ---

20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26
27/*
28 * $FreeBSD: head/sys/dev/netmap/netmap.c 259538 2013-12-18 04:36:35Z glebius $
28 * $FreeBSD: head/sys/dev/netmap/netmap.c 260368 2014-01-06 12:53:15Z luigi $
29 *
30 * This module supports memory mapped access to network devices,
31 * see netmap(4).
32 *
33 * The module uses a large, memory pool allocated by the kernel
34 * and accessible as mmapped memory by multiple userspace threads/processes.
35 * The memory pool contains packet buffers and "netmap rings",
36 * i.e. user-accessible copies of the interface's queues.

--- 109 unchanged lines hidden (view full) ---

146#include <sys/selinfo.h>
147#include <sys/sysctl.h>
148#include <net/if.h>
149#include <net/if_var.h>
150#include <net/bpf.h> /* BIOCIMMEDIATE */
151#include <machine/bus.h> /* bus_dmamap_* */
152#include <sys/endian.h>
153#include <sys/refcount.h>
29 *
30 * This module supports memory mapped access to network devices,
31 * see netmap(4).
32 *
33 * The module uses a large, memory pool allocated by the kernel
34 * and accessible as mmapped memory by multiple userspace threads/processes.
35 * The memory pool contains packet buffers and "netmap rings",
36 * i.e. user-accessible copies of the interface's queues.

--- 109 unchanged lines hidden (view full) ---

146#include <sys/selinfo.h>
147#include <sys/sysctl.h>
148#include <net/if.h>
149#include <net/if_var.h>
150#include <net/bpf.h> /* BIOCIMMEDIATE */
151#include <machine/bus.h> /* bus_dmamap_* */
152#include <sys/endian.h>
153#include <sys/refcount.h>
154#include <sys/jail.h>
155
156
157/* reduce conditional code */
158#define init_waitqueue_head(x) // only needed in linux
159
160
161
162#elif defined(linux)

--- 58 unchanged lines hidden (view full) ---

221/*
222 * netmap_admode selects the netmap mode to use.
223 * Invalid values are reset to NETMAP_ADMODE_BEST
224 */
225enum { NETMAP_ADMODE_BEST = 0, /* use native, fallback to generic */
226 NETMAP_ADMODE_NATIVE, /* either native or none */
227 NETMAP_ADMODE_GENERIC, /* force generic */
228 NETMAP_ADMODE_LAST };
154
155
156/* reduce conditional code */
157#define init_waitqueue_head(x) // only needed in linux
158
159
160
161#elif defined(linux)

--- 58 unchanged lines hidden (view full) ---

220/*
221 * netmap_admode selects the netmap mode to use.
222 * Invalid values are reset to NETMAP_ADMODE_BEST
223 */
224enum { NETMAP_ADMODE_BEST = 0, /* use native, fallback to generic */
225 NETMAP_ADMODE_NATIVE, /* either native or none */
226 NETMAP_ADMODE_GENERIC, /* force generic */
227 NETMAP_ADMODE_LAST };
229#define NETMAP_ADMODE_NATIVE 1 /* Force native netmap adapter. */
230#define NETMAP_ADMODE_GENERIC 2 /* Force generic netmap adapter. */
231#define NETMAP_ADMODE_BEST 0 /* Priority to native netmap adapter. */
232static int netmap_admode = NETMAP_ADMODE_BEST;
233
234int netmap_generic_mit = 100*1000; /* Generic mitigation interval in nanoseconds. */
235int netmap_generic_ringsize = 1024; /* Generic ringsize. */
236
237SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , "");
238SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , "");
239SYSCTL_INT(_dev_netmap, OID_AUTO, mmap_unreg, CTLFLAG_RW, &netmap_mmap_unreg, 0, "");

--- 7 unchanged lines hidden (view full) ---

247static void
248nm_kr_get(struct netmap_kring *kr)
249{
250 while (NM_ATOMIC_TEST_AND_SET(&kr->nr_busy))
251 tsleep(kr, 0, "NM_KR_GET", 4);
252}
253
254
228static int netmap_admode = NETMAP_ADMODE_BEST;
229
230int netmap_generic_mit = 100*1000; /* Generic mitigation interval in nanoseconds. */
231int netmap_generic_ringsize = 1024; /* Generic ringsize. */
232
233SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , "");
234SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , "");
235SYSCTL_INT(_dev_netmap, OID_AUTO, mmap_unreg, CTLFLAG_RW, &netmap_mmap_unreg, 0, "");

--- 7 unchanged lines hidden (view full) ---

243static void
244nm_kr_get(struct netmap_kring *kr)
245{
246 while (NM_ATOMIC_TEST_AND_SET(&kr->nr_busy))
247 tsleep(kr, 0, "NM_KR_GET", 4);
248}
249
250
251/*
252 * mark the ring as stopped, and run through the locks
253 * to make sure other users get to see it.
254 */
255void
256netmap_disable_ring(struct netmap_kring *kr)
257{
258 kr->nkr_stopped = 1;
259 nm_kr_get(kr);
260 mtx_lock(&kr->q_lock);
261 mtx_unlock(&kr->q_lock);
262 nm_kr_put(kr);

--- 112 unchanged lines hidden (view full) ---

375 *o = '\0';
376#undef P_HI
377#undef P_LO
378#undef P_C
379 return dst;
380}
381
382
255void
256netmap_disable_ring(struct netmap_kring *kr)
257{
258 kr->nkr_stopped = 1;
259 nm_kr_get(kr);
260 mtx_lock(&kr->q_lock);
261 mtx_unlock(&kr->q_lock);
262 nm_kr_put(kr);

--- 112 unchanged lines hidden (view full) ---

375 *o = '\0';
376#undef P_HI
377#undef P_LO
378#undef P_C
379 return dst;
380}
381
382
383
384/*
385 * Fetch configuration from the device, to cope with dynamic
386 * reconfigurations after loading the module.
387 */
388int
389netmap_update_config(struct netmap_adapter *na)
390{
391 struct ifnet *ifp = na->ifp;

--- 35 unchanged lines hidden (view full) ---

427
428
429int
430netmap_krings_create(struct netmap_adapter *na, u_int ntx, u_int nrx, u_int tailroom)
431{
432 u_int i, len, ndesc;
433 struct netmap_kring *kring;
434
383/*
384 * Fetch configuration from the device, to cope with dynamic
385 * reconfigurations after loading the module.
386 */
387int
388netmap_update_config(struct netmap_adapter *na)
389{
390 struct ifnet *ifp = na->ifp;

--- 35 unchanged lines hidden (view full) ---

426
427
428int
429netmap_krings_create(struct netmap_adapter *na, u_int ntx, u_int nrx, u_int tailroom)
430{
431 u_int i, len, ndesc;
432 struct netmap_kring *kring;
433
434 // XXX additional space for extra rings ?
435 len = (ntx + nrx) * sizeof(struct netmap_kring) + tailroom;
436
437 na->tx_rings = malloc((size_t)len, M_DEVBUF, M_NOWAIT | M_ZERO);
438 if (na->tx_rings == NULL) {
439 D("Cannot allocate krings");
440 return ENOMEM;
441 }
442 na->rx_rings = na->tx_rings + ntx;
443
435 len = (ntx + nrx) * sizeof(struct netmap_kring) + tailroom;
436
437 na->tx_rings = malloc((size_t)len, M_DEVBUF, M_NOWAIT | M_ZERO);
438 if (na->tx_rings == NULL) {
439 D("Cannot allocate krings");
440 return ENOMEM;
441 }
442 na->rx_rings = na->tx_rings + ntx;
443
444 /*
445 * All fields in krings are 0 except the one initialized below.
446 * but better be explicit on important kring fields.
447 */
444 ndesc = na->num_tx_desc;
445 for (i = 0; i < ntx; i++) { /* Transmit rings */
446 kring = &na->tx_rings[i];
447 bzero(kring, sizeof(*kring));
448 kring->na = na;
448 ndesc = na->num_tx_desc;
449 for (i = 0; i < ntx; i++) { /* Transmit rings */
450 kring = &na->tx_rings[i];
451 bzero(kring, sizeof(*kring));
452 kring->na = na;
453 kring->ring_id = i;
449 kring->nkr_num_slots = ndesc;
450 /*
454 kring->nkr_num_slots = ndesc;
455 /*
451 * IMPORTANT:
452 * Always keep one slot empty, so we can detect new
453 * transmissions comparing cur and nr_hwcur (they are
454 * the same only if there are no new transmissions).
456 * IMPORTANT: Always keep one slot empty.
455 */
457 */
456 kring->nr_hwavail = ndesc - 1;
458 kring->rhead = kring->rcur = kring->nr_hwcur = 0;
459 kring->rtail = kring->nr_hwtail = ndesc - 1;
460 snprintf(kring->name, sizeof(kring->name) - 1, "%s TX%d", NM_IFPNAME(na->ifp), i);
457 mtx_init(&kring->q_lock, "nm_txq_lock", NULL, MTX_DEF);
458 init_waitqueue_head(&kring->si);
459 }
460
461 ndesc = na->num_rx_desc;
462 for (i = 0; i < nrx; i++) { /* Receive rings */
463 kring = &na->rx_rings[i];
464 bzero(kring, sizeof(*kring));
465 kring->na = na;
461 mtx_init(&kring->q_lock, "nm_txq_lock", NULL, MTX_DEF);
462 init_waitqueue_head(&kring->si);
463 }
464
465 ndesc = na->num_rx_desc;
466 for (i = 0; i < nrx; i++) { /* Receive rings */
467 kring = &na->rx_rings[i];
468 bzero(kring, sizeof(*kring));
469 kring->na = na;
470 kring->ring_id = i;
466 kring->nkr_num_slots = ndesc;
471 kring->nkr_num_slots = ndesc;
472 kring->rhead = kring->rcur = kring->nr_hwcur = 0;
473 kring->rtail = kring->nr_hwtail = 0;
474 snprintf(kring->name, sizeof(kring->name) - 1, "%s RX%d", NM_IFPNAME(na->ifp), i);
467 mtx_init(&kring->q_lock, "nm_rxq_lock", NULL, MTX_DEF);
468 init_waitqueue_head(&kring->si);
469 }
470 init_waitqueue_head(&na->tx_si);
471 init_waitqueue_head(&na->rx_si);
472
473 na->tailroom = na->rx_rings + nrx;
474
475 return 0;
475 mtx_init(&kring->q_lock, "nm_rxq_lock", NULL, MTX_DEF);
476 init_waitqueue_head(&kring->si);
477 }
478 init_waitqueue_head(&na->tx_si);
479 init_waitqueue_head(&na->rx_si);
480
481 na->tailroom = na->rx_rings + nrx;
482
483 return 0;
476
477}
478
479
484}
485
486
487/* XXX check boundaries */
480void
481netmap_krings_delete(struct netmap_adapter *na)
482{
483 int i;
484
485 for (i = 0; i < na->num_tx_rings + 1; i++) {
486 mtx_destroy(&na->tx_rings[i].q_lock);
487 }
488 for (i = 0; i < na->num_rx_rings + 1; i++) {
489 mtx_destroy(&na->rx_rings[i].q_lock);
490 }
491 free(na->tx_rings, M_DEVBUF);
492 na->tx_rings = na->rx_rings = na->tailroom = NULL;
493}
494
495
488void
489netmap_krings_delete(struct netmap_adapter *na)
490{
491 int i;
492
493 for (i = 0; i < na->num_tx_rings + 1; i++) {
494 mtx_destroy(&na->tx_rings[i].q_lock);
495 }
496 for (i = 0; i < na->num_rx_rings + 1; i++) {
497 mtx_destroy(&na->rx_rings[i].q_lock);
498 }
499 free(na->tx_rings, M_DEVBUF);
500 na->tx_rings = na->rx_rings = na->tailroom = NULL;
501}
502
503
504/*
505 * Destructor for NIC ports. They also have an mbuf queue
506 * on the rings connected to the host so we need to purge
507 * them first.
508 */
509static void
510netmap_hw_krings_delete(struct netmap_adapter *na)
511{
512 struct mbq *q = &na->rx_rings[na->num_rx_rings].rx_queue;
513
514 ND("destroy sw mbq with len %d", mbq_len(q));
515 mbq_purge(q);
516 mbq_safe_destroy(q);
517 netmap_krings_delete(na);
518}
519
520
496static struct netmap_if*
497netmap_if_new(const char *ifname, struct netmap_adapter *na)
498{
499 struct netmap_if *nifp;
500
501 if (netmap_update_config(na)) {
502 /* configuration mismatch, report and fail */
503 return NULL;

--- 212 unchanged lines hidden (view full) ---

716 * into mbufs and we are done. The host --> NIC side is slightly
717 * harder because there might not be room in the tx ring so it
718 * might take a while before releasing the buffer.
719 */
720
721
722/*
723 * pass a chain of buffers to the host stack as coming from 'dst'
521static struct netmap_if*
522netmap_if_new(const char *ifname, struct netmap_adapter *na)
523{
524 struct netmap_if *nifp;
525
526 if (netmap_update_config(na)) {
527 /* configuration mismatch, report and fail */
528 return NULL;

--- 212 unchanged lines hidden (view full) ---

741 * into mbufs and we are done. The host --> NIC side is slightly
742 * harder because there might not be room in the tx ring so it
743 * might take a while before releasing the buffer.
744 */
745
746
747/*
748 * pass a chain of buffers to the host stack as coming from 'dst'
749 * We do not need to lock because the queue is private.
724 */
725static void
726netmap_send_up(struct ifnet *dst, struct mbq *q)
727{
728 struct mbuf *m;
729
730 /* send packets up, outside the lock */
731 while ((m = mbq_dequeue(q)) != NULL) {
732 if (netmap_verbose & NM_VERB_HOST)
733 D("sending up pkt %p size %d", m, MBUF_LEN(m));
734 NM_SEND_UP(dst, m);
735 }
736 mbq_destroy(q);
737}
738
739
740/*
741 * put a copy of the buffers marked NS_FORWARD into an mbuf chain.
750 */
751static void
752netmap_send_up(struct ifnet *dst, struct mbq *q)
753{
754 struct mbuf *m;
755
756 /* send packets up, outside the lock */
757 while ((m = mbq_dequeue(q)) != NULL) {
758 if (netmap_verbose & NM_VERB_HOST)
759 D("sending up pkt %p size %d", m, MBUF_LEN(m));
760 NM_SEND_UP(dst, m);
761 }
762 mbq_destroy(q);
763}
764
765
766/*
767 * put a copy of the buffers marked NS_FORWARD into an mbuf chain.
742 * Run from hwcur to cur - reserved
768 * Take packets from hwcur to ring->head marked NS_FORWARD (or forced)
769 * and pass them up. Drop remaining packets in the unlikely event
770 * of an mbuf shortage.
743 */
744static void
745netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force)
746{
771 */
772static void
773netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force)
774{
747 /* Take packets from hwcur to cur-reserved and pass them up.
748 * In case of no buffers we give up. At the end of the loop,
749 * the queue is drained in all cases.
750 * XXX handle reserved
751 */
752 u_int lim = kring->nkr_num_slots - 1;
753 struct mbuf *m;
754 u_int k = kring->ring->cur, n = kring->ring->reserved;
775 u_int const lim = kring->nkr_num_slots - 1;
776 u_int const head = kring->ring->head;
777 u_int n;
755 struct netmap_adapter *na = kring->na;
756
778 struct netmap_adapter *na = kring->na;
779
757 /* compute the final position, ring->cur - ring->reserved */
758 if (n > 0) {
759 if (k < n)
760 k += kring->nkr_num_slots;
761 k += n;
762 }
763 for (n = kring->nr_hwcur; n != k;) {
780 for (n = kring->nr_hwcur; n != head; n = nm_next(n, lim)) {
781 struct mbuf *m;
764 struct netmap_slot *slot = &kring->ring->slot[n];
765
782 struct netmap_slot *slot = &kring->ring->slot[n];
783
766 n = nm_next(n, lim);
767 if ((slot->flags & NS_FORWARD) == 0 && !force)
768 continue;
769 if (slot->len < 14 || slot->len > NETMAP_BDG_BUF_SIZE(na->nm_mem)) {
784 if ((slot->flags & NS_FORWARD) == 0 && !force)
785 continue;
786 if (slot->len < 14 || slot->len > NETMAP_BDG_BUF_SIZE(na->nm_mem)) {
770 D("bad pkt at %d len %d", n, slot->len);
787 RD(5, "bad pkt at %d len %d", n, slot->len);
771 continue;
772 }
773 slot->flags &= ~NS_FORWARD; // XXX needed ?
788 continue;
789 }
790 slot->flags &= ~NS_FORWARD; // XXX needed ?
774 /* XXX adapt to the case of a multisegment packet */
791 /* XXX TODO: adapt to the case of a multisegment packet */
775 m = m_devget(BDG_NMB(na, slot), slot->len, 0, na->ifp, NULL);
776
777 if (m == NULL)
778 break;
779 mbq_enqueue(q, m);
780 }
781}
782
783
784/*
792 m = m_devget(BDG_NMB(na, slot), slot->len, 0, na->ifp, NULL);
793
794 if (m == NULL)
795 break;
796 mbq_enqueue(q, m);
797 }
798}
799
800
801/*
785 * The host ring has packets from nr_hwcur to (cur - reserved)
786 * to be sent down to the NIC.
787 * We need to use the queue lock on the source (host RX ring)
788 * to protect against netmap_transmit.
789 * If the user is well behaved we do not need to acquire locks
790 * on the destination(s),
791 * so we only need to make sure that there are no panics because
792 * of user errors.
793 * XXX verify
794 *
795 * We scan the tx rings, which have just been
796 * flushed so nr_hwcur == cur. Pushing packets down means
797 * increment cur and decrement avail.
798 * XXX to be verified
802 * Send to the NIC rings packets marked NS_FORWARD between
803 * kring->nr_hwcur and kring->rhead
804 * Called under kring->rx_queue.lock on the sw rx ring,
799 */
805 */
800static void
806static u_int
801netmap_sw_to_nic(struct netmap_adapter *na)
802{
803 struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
807netmap_sw_to_nic(struct netmap_adapter *na)
808{
809 struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
804 struct netmap_kring *k1 = &na->tx_rings[0];
805 u_int i, howmany, src_lim, dst_lim;
810 struct netmap_slot *rxslot = kring->ring->slot;
811 u_int i, rxcur = kring->nr_hwcur;
812 u_int const head = kring->rhead;
813 u_int const src_lim = kring->nkr_num_slots - 1;
814 u_int sent = 0;
806
815
807 /* XXX we should also check that the carrier is on */
808 if (kring->nkr_stopped)
809 return;
816 /* scan rings to find space, then fill as much as possible */
817 for (i = 0; i < na->num_tx_rings; i++) {
818 struct netmap_kring *kdst = &na->tx_rings[i];
819 struct netmap_ring *rdst = kdst->ring;
820 u_int const dst_lim = kdst->nkr_num_slots - 1;
810
821
811 mtx_lock(&kring->q_lock);
822 /* XXX do we trust ring or kring->rcur,rtail ? */
823 for (; rxcur != head && !nm_ring_empty(rdst);
824 rxcur = nm_next(rxcur, src_lim) ) {
825 struct netmap_slot *src, *dst, tmp;
826 u_int dst_cur = rdst->cur;
812
827
813 if (kring->nkr_stopped)
814 goto out;
828 src = &rxslot[rxcur];
829 if ((src->flags & NS_FORWARD) == 0 && !netmap_fwd)
830 continue;
815
831
816 howmany = kring->nr_hwavail; /* XXX otherwise cur - reserved - nr_hwcur */
832 sent++;
817
833
818 src_lim = kring->nkr_num_slots - 1;
819 for (i = 0; howmany > 0 && i < na->num_tx_rings; i++, k1++) {
820 ND("%d packets left to ring %d (space %d)", howmany, i, k1->nr_hwavail);
821 dst_lim = k1->nkr_num_slots - 1;
822 while (howmany > 0 && k1->ring->avail > 0) {
823 struct netmap_slot *src, *dst, tmp;
824 src = &kring->ring->slot[kring->nr_hwcur];
825 dst = &k1->ring->slot[k1->ring->cur];
834 dst = &rdst->slot[dst_cur];
835
826 tmp = *src;
836 tmp = *src;
837
827 src->buf_idx = dst->buf_idx;
828 src->flags = NS_BUF_CHANGED;
829
830 dst->buf_idx = tmp.buf_idx;
831 dst->len = tmp.len;
832 dst->flags = NS_BUF_CHANGED;
838 src->buf_idx = dst->buf_idx;
839 src->flags = NS_BUF_CHANGED;
840
841 dst->buf_idx = tmp.buf_idx;
842 dst->len = tmp.len;
843 dst->flags = NS_BUF_CHANGED;
833 ND("out len %d buf %d from %d to %d",
834 dst->len, dst->buf_idx,
835 kring->nr_hwcur, k1->ring->cur);
836
844
837 kring->nr_hwcur = nm_next(kring->nr_hwcur, src_lim);
838 howmany--;
839 kring->nr_hwavail--;
840 k1->ring->cur = nm_next(k1->ring->cur, dst_lim);
841 k1->ring->avail--;
845 rdst->cur = nm_next(dst_cur, dst_lim);
842 }
846 }
843 kring->ring->cur = kring->nr_hwcur; // XXX
844 k1++; // XXX why?
847 /* if (sent) XXX txsync ? */
845 }
848 }
846out:
847 mtx_unlock(&kring->q_lock);
849 return sent;
848}
849
850
851/*
852 * netmap_txsync_to_host() passes packets up. We are called from a
853 * system call in user process context, and the only contention
854 * can be among multiple user threads erroneously calling
855 * this routine concurrently.
856 */
857void
858netmap_txsync_to_host(struct netmap_adapter *na)
859{
860 struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings];
861 struct netmap_ring *ring = kring->ring;
850}
851
852
853/*
854 * netmap_txsync_to_host() passes packets up. We are called from a
855 * system call in user process context, and the only contention
856 * can be among multiple user threads erroneously calling
857 * this routine concurrently.
858 */
859void
860netmap_txsync_to_host(struct netmap_adapter *na)
861{
862 struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings];
863 struct netmap_ring *ring = kring->ring;
862 u_int k, lim = kring->nkr_num_slots - 1;
864 u_int const lim = kring->nkr_num_slots - 1;
865 u_int const head = nm_txsync_prologue(kring);
863 struct mbq q;
864 int error;
865
866 error = nm_kr_tryget(kring);
867 if (error) {
868 if (error == NM_KR_BUSY)
869 D("ring %p busy (user error)", kring);
870 return;
871 }
866 struct mbq q;
867 int error;
868
869 error = nm_kr_tryget(kring);
870 if (error) {
871 if (error == NM_KR_BUSY)
872 D("ring %p busy (user error)", kring);
873 return;
874 }
872 k = ring->cur;
873 if (k > lim) {
875 if (head > lim) {
874 D("invalid ring index in stack TX kring %p", kring);
875 netmap_ring_reinit(kring);
876 nm_kr_put(kring);
877 return;
878 }
879
876 D("invalid ring index in stack TX kring %p", kring);
877 netmap_ring_reinit(kring);
878 nm_kr_put(kring);
879 return;
880 }
881
880 /* Take packets from hwcur to cur and pass them up.
882 /* Take packets from hwcur to head and pass them up.
883 * force head = cur since netmap_grab_packets() stops at head
881 * In case of no buffers we give up. At the end of the loop,
882 * the queue is drained in all cases.
883 */
884 mbq_init(&q);
884 * In case of no buffers we give up. At the end of the loop,
885 * the queue is drained in all cases.
886 */
887 mbq_init(&q);
885 netmap_grab_packets(kring, &q, 1);
886 kring->nr_hwcur = k;
887 kring->nr_hwavail = ring->avail = lim;
888 ring->cur = head;
889 netmap_grab_packets(kring, &q, 1 /* force */);
890 ND("have %d pkts in queue", mbq_len(&q));
891 kring->nr_hwcur = head;
892 kring->nr_hwtail = head + lim;
893 if (kring->nr_hwtail > lim)
894 kring->nr_hwtail -= lim + 1;
895 nm_txsync_finalize(kring);
888
889 nm_kr_put(kring);
890 netmap_send_up(na->ifp, &q);
891}
892
893
894/*
895 * rxsync backend for packets coming from the host stack.
896
897 nm_kr_put(kring);
898 netmap_send_up(na->ifp, &q);
899}
900
901
902/*
903 * rxsync backend for packets coming from the host stack.
896 * They have been put in the queue by netmap_transmit() so we
897 * need to protect access to the kring using a lock.
904 * They have been put in kring->rx_queue by netmap_transmit().
905 * We protect access to the kring using kring->rx_queue.lock
898 *
899 * This routine also does the selrecord if called from the poll handler
900 * (we know because td != NULL).
901 *
902 * NOTE: on linux, selrecord() is defined as a macro and uses pwait
903 * as an additional hidden argument.
906 *
907 * This routine also does the selrecord if called from the poll handler
908 * (we know because td != NULL).
909 *
910 * NOTE: on linux, selrecord() is defined as a macro and uses pwait
911 * as an additional hidden argument.
912 * returns the number of packets delivered to tx queues in
913 * transparent mode, or a negative value if error
904 */
914 */
905static void
915int
906netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait)
907{
908 struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
909 struct netmap_ring *ring = kring->ring;
916netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait)
917{
918 struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
919 struct netmap_ring *ring = kring->ring;
910 u_int j, n, lim = kring->nkr_num_slots;
911 u_int k = ring->cur, resvd = ring->reserved;
920 u_int nm_i, n;
921 u_int const lim = kring->nkr_num_slots - 1;
922 u_int const head = nm_rxsync_prologue(kring);
923 int ret = 0;
924 struct mbq *q = &kring->rx_queue;
912
913 (void)pwait; /* disable unused warnings */
914
925
926 (void)pwait; /* disable unused warnings */
927
928 if (head > lim) {
929 netmap_ring_reinit(kring);
930 return EINVAL;
931 }
932
915 if (kring->nkr_stopped) /* check a first time without lock */
933 if (kring->nkr_stopped) /* check a first time without lock */
916 return;
934 return EBUSY;
917
935
918 mtx_lock(&kring->q_lock);
936 mtx_lock(&q->lock);
919
937
920 if (kring->nkr_stopped) /* check again with lock held */
938 if (kring->nkr_stopped) { /* check again with lock held */
939 ret = EBUSY;
921 goto unlock_out;
940 goto unlock_out;
922
923 if (k >= lim) {
924 netmap_ring_reinit(kring);
925 goto unlock_out;
926 }
941 }
927 /* new packets are already set in nr_hwavail */
928 /* skip past packets that userspace has released */
929 j = kring->nr_hwcur;
930 if (resvd > 0) {
931 if (resvd + ring->avail >= lim + 1) {
932 D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
933 ring->reserved = resvd = 0; // XXX panic...
942
943 /* First part: import newly received packets */
944 n = mbq_len(q);
945 if (n) { /* grab packets from the queue */
946 struct mbuf *m;
947 uint32_t stop_i;
948
949 nm_i = kring->nr_hwtail;
950 stop_i = nm_prev(nm_i, lim);
951 while ( nm_i != stop_i && (m = mbq_dequeue(q)) != NULL ) {
952 int len = MBUF_LEN(m);
953 struct netmap_slot *slot = &ring->slot[nm_i];
954
955 m_copydata(m, 0, len, BDG_NMB(na, slot));
956 ND("nm %d len %d", nm_i, len);
957 if (netmap_verbose)
958 D("%s", nm_dump_buf(BDG_NMB(na, slot),len, 128, NULL));
959
960 slot->len = len;
961 slot->flags = kring->nkr_slot_flags;
962 nm_i = nm_next(nm_i, lim);
934 }
963 }
935 k = (k >= resvd) ? k - resvd : k + lim - resvd;
964 kring->nr_hwtail = nm_i;
936 }
965 }
937 if (j != k) {
938 n = k >= j ? k - j : k + lim - j;
939 kring->nr_hwavail -= n;
940 kring->nr_hwcur = k;
966
967 /*
968 * Second part: skip past packets that userspace has released.
969 */
970 nm_i = kring->nr_hwcur;
971 if (nm_i != head) { /* something was released */
972 if (netmap_fwd || kring->ring->flags & NR_FORWARD)
973 ret = netmap_sw_to_nic(na);
974 kring->nr_hwcur = head;
941 }
975 }
942 k = ring->avail = kring->nr_hwavail - resvd;
943 if (k == 0 && td)
976
977 nm_rxsync_finalize(kring);
978
979 /* access copies of cur,tail in the kring */
980 if (kring->rcur == kring->rtail && td) /* no bufs available */
944 selrecord(td, &kring->si);
981 selrecord(td, &kring->si);
945 if (k && (netmap_verbose & NM_VERB_HOST))
946 D("%d pkts from stack", k);
982
947unlock_out:
948
983unlock_out:
984
949 mtx_unlock(&kring->q_lock);
985 mtx_unlock(&q->lock);
986 return ret;
950}
951
952
953/* Get a netmap adapter for the port.
954 *
955 * If it is possible to satisfy the request, return 0
956 * with *na containing the netmap adapter found.
957 * Otherwise return an error code, with *na containing NULL.

--- 79 unchanged lines hidden (view full) ---

1037 *na = NA(ifp);
1038 gna = (struct netmap_generic_adapter*)NA(ifp);
1039 gna->prev = prev_na; /* save old na */
1040 if (prev_na != NULL) {
1041 ifunit_ref(ifp->if_xname);
1042 // XXX add a refcount ?
1043 netmap_adapter_get(prev_na);
1044 }
987}
988
989
990/* Get a netmap adapter for the port.
991 *
992 * If it is possible to satisfy the request, return 0
993 * with *na containing the netmap adapter found.
994 * Otherwise return an error code, with *na containing NULL.

--- 79 unchanged lines hidden (view full) ---

1074 *na = NA(ifp);
1075 gna = (struct netmap_generic_adapter*)NA(ifp);
1076 gna->prev = prev_na; /* save old na */
1077 if (prev_na != NULL) {
1078 ifunit_ref(ifp->if_xname);
1079 // XXX add a refcount ?
1080 netmap_adapter_get(prev_na);
1081 }
1045 D("Created generic NA %p (prev %p)", gna, gna->prev);
1082 ND("Created generic NA %p (prev %p)", gna, gna->prev);
1046
1047 return 0;
1048}
1049
1050
1051/*
1052 * MUST BE CALLED UNDER NMG_LOCK()
1053 *

--- 54 unchanged lines hidden (view full) ---

1108
1109 return error;
1110}
1111
1112
1113/*
1114 * validate parameters on entry for *_txsync()
1115 * Returns ring->cur if ok, or something >= kring->nkr_num_slots
1083
1084 return 0;
1085}
1086
1087
1088/*
1089 * MUST BE CALLED UNDER NMG_LOCK()
1090 *

--- 54 unchanged lines hidden (view full) ---

1145
1146 return error;
1147}
1148
1149
1150/*
1151 * validate parameters on entry for *_txsync()
1152 * Returns ring->cur if ok, or something >= kring->nkr_num_slots
1116 * in case of error. The extra argument is a pointer to
1117 * 'new_bufs'. XXX this may be deprecated at some point.
1153 * in case of error.
1118 *
1154 *
1119 * Below is a correct configuration on input. ring->cur
1120 * must be in the region covered by kring->hwavail,
1121 * and ring->avail and kring->avail should end at the same slot.
1155 * rhead, rcur and rtail=hwtail are stored from previous round.
1156 * hwcur is the next packet to send to the ring.
1122 *
1157 *
1123 * +-hwcur
1124 * |
1125 * v<--hwres-->|<-----hwavail---->
1126 * ------+------------------------------+-------- ring
1127 * |
1128 * |<---avail--->
1129 * +--cur
1158 * We want
1159 * hwcur <= *rhead <= head <= cur <= tail = *rtail <= hwtail
1130 *
1160 *
1161 * hwcur, rhead, rtail and hwtail are reliable
1131 */
1132u_int
1162 */
1163u_int
1133nm_txsync_prologue(struct netmap_kring *kring, u_int *new_slots)
1164nm_txsync_prologue(struct netmap_kring *kring)
1134{
1135 struct netmap_ring *ring = kring->ring;
1165{
1166 struct netmap_ring *ring = kring->ring;
1167 u_int head = ring->head; /* read only once */
1136 u_int cur = ring->cur; /* read only once */
1168 u_int cur = ring->cur; /* read only once */
1137 u_int avail = ring->avail; /* read only once */
1138 u_int n = kring->nkr_num_slots;
1169 u_int n = kring->nkr_num_slots;
1139 u_int kstart, kend, a;
1140
1170
1141#if 1 /* kernel sanity checks */
1142 if (kring->nr_hwcur >= n ||
1143 kring->nr_hwreserved >= n || kring->nr_hwavail >= n ||
1144 kring->nr_hwreserved + kring->nr_hwavail >= n)
1171 ND(5, "%s kcur %d ktail %d head %d cur %d tail %d",
1172 kring->name,
1173 kring->nr_hwcur, kring->nr_hwtail,
1174 ring->head, ring->cur, ring->tail);
1175#if 1 /* kernel sanity checks; but we can trust the kring. */
1176 if (kring->nr_hwcur >= n || kring->rhead >= n ||
1177 kring->rtail >= n || kring->nr_hwtail >= n)
1145 goto error;
1146#endif /* kernel sanity checks */
1178 goto error;
1179#endif /* kernel sanity checks */
1147 kstart = kring->nr_hwcur + kring->nr_hwreserved;
1148 if (kstart >= n)
1149 kstart -= n;
1150 kend = kstart + kring->nr_hwavail;
1151 /* user sanity checks. a is the expected avail */
1152 if (cur < kstart) {
1153 /* too low, but maybe wraparound */
1154 if (cur + n > kend)
1180 /*
1181 * user sanity checks. We only use 'cur',
1182 * A, B, ... are possible positions for cur:
1183 *
1184 * 0 A cur B tail C n-1
1185 * 0 D tail E cur F n-1
1186 *
1187 * B, F, D are valid. A, C, E are wrong
1188 */
1189 if (kring->rtail >= kring->rhead) {
1190 /* want rhead <= head <= rtail */
1191 if (head < kring->rhead || head > kring->rtail)
1155 goto error;
1192 goto error;
1156 *new_slots = cur + n - kstart;
1157 a = kend - cur - n;
1158 } else {
1159 if (cur > kend)
1193 /* and also head <= cur <= rtail */
1194 if (cur < head || cur > kring->rtail)
1160 goto error;
1195 goto error;
1161 *new_slots = cur - kstart;
1162 a = kend - cur;
1196 } else { /* here rtail < rhead */
1197 /* we need head outside rtail .. rhead */
1198 if (head > kring->rtail && head < kring->rhead)
1199 goto error;
1200
1201 /* two cases now: head <= rtail or head >= rhead */
1202 if (head <= kring->rtail) {
1203 /* want head <= cur <= rtail */
1204 if (cur < head || cur > kring->rtail)
1205 goto error;
1206 } else { /* head >= rhead */
1207 /* cur must be outside rtail..head */
1208 if (cur > kring->rtail && cur < head)
1209 goto error;
1210 }
1163 }
1211 }
1164 if (a != avail) {
1165 RD(5, "wrong but fixable avail have %d need %d",
1166 avail, a);
1167 ring->avail = avail = a;
1212 if (ring->tail != kring->rtail) {
1213 RD(5, "tail overwritten was %d need %d",
1214 ring->tail, kring->rtail);
1215 ring->tail = kring->rtail;
1168 }
1216 }
1169 return cur;
1217 kring->rhead = head;
1218 kring->rcur = cur;
1219 return head;
1170
1171error:
1220
1221error:
1172 RD(5, "kring error: hwcur %d hwres %d hwavail %d cur %d av %d",
1222 RD(5, "%s kring error: hwcur %d rcur %d hwtail %d cur %d tail %d",
1223 kring->name,
1173 kring->nr_hwcur,
1224 kring->nr_hwcur,
1174 kring->nr_hwreserved, kring->nr_hwavail,
1175 cur, avail);
1225 kring->rcur, kring->nr_hwtail,
1226 cur, ring->tail);
1176 return n;
1177}
1178
1179
1180/*
1181 * validate parameters on entry for *_rxsync()
1227 return n;
1228}
1229
1230
1231/*
1232 * validate parameters on entry for *_rxsync()
1182 * Returns ring->cur - ring->reserved if ok,
1183 * or something >= kring->nkr_num_slots
1184 * in case of error. The extra argument is a pointer to
1185 * 'resvd'. XXX this may be deprecated at some point.
1233 * Returns ring->head if ok, kring->nkr_num_slots on error.
1186 *
1234 *
1187 * Below is a correct configuration on input. ring->cur and
1188 * ring->reserved must be in the region covered by kring->hwavail,
1189 * and ring->avail and kring->avail should end at the same slot.
1235 * For a valid configuration,
1236 * hwcur <= head <= cur <= tail <= hwtail
1190 *
1237 *
1191 * +-hwcur
1192 * |
1193 * v<-------hwavail---------->
1194 * ---------+--------------------------+-------- ring
1195 * |<--res-->|
1196 * |<---avail--->
1197 * +--cur
1238 * We only consider head and cur.
1239 * hwcur and hwtail are reliable.
1198 *
1199 */
1200u_int
1240 *
1241 */
1242u_int
1201nm_rxsync_prologue(struct netmap_kring *kring, u_int *resvd)
1243nm_rxsync_prologue(struct netmap_kring *kring)
1202{
1203 struct netmap_ring *ring = kring->ring;
1244{
1245 struct netmap_ring *ring = kring->ring;
1204 u_int cur = ring->cur; /* read only once */
1205 u_int avail = ring->avail; /* read only once */
1206 u_int res = ring->reserved; /* read only once */
1207 u_int n = kring->nkr_num_slots;
1208 u_int kend = kring->nr_hwcur + kring->nr_hwavail;
1209 u_int a;
1246 uint32_t const n = kring->nkr_num_slots;
1247 uint32_t head, cur;
1210
1248
1249 ND("%s kc %d kt %d h %d c %d t %d",
1250 kring->name,
1251 kring->nr_hwcur, kring->nr_hwtail,
1252 ring->head, ring->cur, ring->tail);
1253 /*
1254 * Before storing the new values, we should check they do not
1255 * move backwards. However:
1256 * - head is not an issue because the previous value is hwcur;
1257 * - cur could in principle go back, however it does not matter
1258 * because we are processing a brand new rxsync()
1259 */
1260 cur = kring->rcur = ring->cur; /* read only once */
1261 head = kring->rhead = ring->head; /* read only once */
1211#if 1 /* kernel sanity checks */
1262#if 1 /* kernel sanity checks */
1212 if (kring->nr_hwcur >= n || kring->nr_hwavail >= n)
1263 if (kring->nr_hwcur >= n || kring->nr_hwtail >= n)
1213 goto error;
1214#endif /* kernel sanity checks */
1215 /* user sanity checks */
1264 goto error;
1265#endif /* kernel sanity checks */
1266 /* user sanity checks */
1216 if (res >= n)
1217 goto error;
1218 /* check that cur is valid, a is the expected value of avail */
1219 if (cur < kring->nr_hwcur) {
1220 /* too low, but maybe wraparound */
1221 if (cur + n > kend)
1267 if (kring->nr_hwtail >= kring->nr_hwcur) {
1268 /* want hwcur <= rhead <= hwtail */
1269 if (head < kring->nr_hwcur || head > kring->nr_hwtail)
1222 goto error;
1270 goto error;
1223 a = kend - (cur + n);
1224 } else {
1225 if (cur > kend)
1271 /* and also rhead <= rcur <= hwtail */
1272 if (cur < head || cur > kring->nr_hwtail)
1226 goto error;
1273 goto error;
1227 a = kend - cur;
1228 }
1229 if (a != avail) {
1230 RD(5, "wrong but fixable avail have %d need %d",
1231 avail, a);
1232 ring->avail = avail = a;
1233 }
1234 if (res != 0) {
1235 /* then repeat the check for cur + res */
1236 cur = (cur >= res) ? cur - res : n + cur - res;
1237 if (cur < kring->nr_hwcur) {
1238 /* too low, but maybe wraparound */
1239 if (cur + n > kend)
1240 goto error;
1241 } else if (cur > kend) {
1274 } else {
1275 /* we need rhead outside hwtail..hwcur */
1276 if (head < kring->nr_hwcur && head > kring->nr_hwtail)
1242 goto error;
1277 goto error;
1278 /* two cases now: head <= hwtail or head >= hwcur */
1279 if (head <= kring->nr_hwtail) {
1280 /* want head <= cur <= hwtail */
1281 if (cur < head || cur > kring->nr_hwtail)
1282 goto error;
1283 } else {
1284 /* cur must be outside hwtail..head */
1285 if (cur < head && cur > kring->nr_hwtail)
1286 goto error;
1243 }
1244 }
1287 }
1288 }
1245 *resvd = res;
1246 return cur;
1289 if (ring->tail != kring->rtail) {
1290 RD(5, "%s tail overwritten was %d need %d",
1291 kring->name,
1292 ring->tail, kring->rtail);
1293 ring->tail = kring->rtail;
1294 }
1295 return head;
1247
1248error:
1296
1297error:
1249 RD(5, "kring error: hwcur %d hwres %d hwavail %d cur %d av %d res %d",
1298 RD(5, "kring error: hwcur %d rcur %d hwtail %d head %d cur %d tail %d",
1250 kring->nr_hwcur,
1299 kring->nr_hwcur,
1251 kring->nr_hwreserved, kring->nr_hwavail,
1252 ring->cur, avail, res);
1300 kring->rcur, kring->nr_hwtail,
1301 kring->rhead, kring->rcur, ring->tail);
1253 return n;
1254}
1255
1302 return n;
1303}
1304
1305
1256/*
1257 * Error routine called when txsync/rxsync detects an error.
1306/*
1307 * Error routine called when txsync/rxsync detects an error.
1258 * Can't do much more than resetting cur = hwcur, avail = hwavail.
1308 * Can't do much more than resetting head =cur = hwcur, tail = hwtail
1259 * Return 1 on reinit.
1260 *
1261 * This routine is only called by the upper half of the kernel.
1262 * It only reads hwcur (which is changed only by the upper half, too)
1309 * Return 1 on reinit.
1310 *
1311 * This routine is only called by the upper half of the kernel.
1312 * It only reads hwcur (which is changed only by the upper half, too)
1263 * and hwavail (which may be changed by the lower half, but only on
1313 * and hwtail (which may be changed by the lower half, but only on
1264 * a tx ring and only to increase it, so any error will be recovered
1265 * on the next call). For the above, we don't strictly need to call
1266 * it under lock.
1267 */
1268int
1269netmap_ring_reinit(struct netmap_kring *kring)
1270{
1271 struct netmap_ring *ring = kring->ring;
1272 u_int i, lim = kring->nkr_num_slots - 1;
1273 int errors = 0;
1274
1275 // XXX KASSERT nm_kr_tryget
1276 RD(10, "called for %s", NM_IFPNAME(kring->na->ifp));
1314 * a tx ring and only to increase it, so any error will be recovered
1315 * on the next call). For the above, we don't strictly need to call
1316 * it under lock.
1317 */
1318int
1319netmap_ring_reinit(struct netmap_kring *kring)
1320{
1321 struct netmap_ring *ring = kring->ring;
1322 u_int i, lim = kring->nkr_num_slots - 1;
1323 int errors = 0;
1324
1325 // XXX KASSERT nm_kr_tryget
1326 RD(10, "called for %s", NM_IFPNAME(kring->na->ifp));
1327 // XXX probably wrong to trust userspace
1328 kring->rhead = ring->head;
1329 kring->rcur = ring->cur;
1330 kring->rtail = ring->tail;
1331
1277 if (ring->cur > lim)
1278 errors++;
1332 if (ring->cur > lim)
1333 errors++;
1334 if (ring->head > lim)
1335 errors++;
1336 if (ring->tail > lim)
1337 errors++;
1279 for (i = 0; i <= lim; i++) {
1280 u_int idx = ring->slot[i].buf_idx;
1281 u_int len = ring->slot[i].len;
1282 if (idx < 2 || idx >= netmap_total_buffers) {
1338 for (i = 0; i <= lim; i++) {
1339 u_int idx = ring->slot[i].buf_idx;
1340 u_int len = ring->slot[i].len;
1341 if (idx < 2 || idx >= netmap_total_buffers) {
1283 if (!errors++)
1284 D("bad buffer at slot %d idx %d len %d ", i, idx, len);
1342 RD(5, "bad index at slot %d idx %d len %d ", i, idx, len);
1285 ring->slot[i].buf_idx = 0;
1286 ring->slot[i].len = 0;
1287 } else if (len > NETMAP_BDG_BUF_SIZE(kring->na->nm_mem)) {
1288 ring->slot[i].len = 0;
1343 ring->slot[i].buf_idx = 0;
1344 ring->slot[i].len = 0;
1345 } else if (len > NETMAP_BDG_BUF_SIZE(kring->na->nm_mem)) {
1346 ring->slot[i].len = 0;
1289 if (!errors++)
1290 D("bad len %d at slot %d idx %d",
1291 len, i, idx);
1347 RD(5, "bad len at slot %d idx %d len %d", i, idx, len);
1292 }
1293 }
1294 if (errors) {
1348 }
1349 }
1350 if (errors) {
1295 int pos = kring - kring->na->tx_rings;
1296 int n = kring->na->num_tx_rings + 1;
1297
1298 RD(10, "total %d errors", errors);
1351 RD(10, "total %d errors", errors);
1299 errors++;
1300 RD(10, "%s %s[%d] reinit, cur %d -> %d avail %d -> %d",
1301 NM_IFPNAME(kring->na->ifp),
1302 pos < n ? "TX" : "RX", pos < n ? pos : pos - n,
1352 RD(10, "%s reinit, cur %d -> %d tail %d -> %d",
1353 kring->name,
1303 ring->cur, kring->nr_hwcur,
1354 ring->cur, kring->nr_hwcur,
1304 ring->avail, kring->nr_hwavail);
1305 ring->cur = kring->nr_hwcur;
1306 ring->avail = kring->nr_hwavail;
1355 ring->tail, kring->nr_hwtail);
1356 ring->head = kring->rhead = kring->nr_hwcur;
1357 ring->cur = kring->rcur = kring->nr_hwcur;
1358 ring->tail = kring->rtail = kring->nr_hwtail;
1307 }
1308 return (errors ? 1 : 0);
1309}
1310
1311
1312/*
1313 * Set the ring ID. For devices with a single queue, a request
1314 * for all rings is the same as a single ring.

--- 116 unchanged lines hidden (view full) ---

1431
1432/*
1433 * ioctl(2) support for the "netmap" device.
1434 *
1435 * Following a list of accepted commands:
1436 * - NIOCGINFO
1437 * - SIOCGIFADDR just for convenience
1438 * - NIOCREGIF
1359 }
1360 return (errors ? 1 : 0);
1361}
1362
1363
1364/*
1365 * Set the ring ID. For devices with a single queue, a request
1366 * for all rings is the same as a single ring.

--- 116 unchanged lines hidden (view full) ---

1483
1484/*
1485 * ioctl(2) support for the "netmap" device.
1486 *
1487 * Following a list of accepted commands:
1488 * - NIOCGINFO
1489 * - SIOCGIFADDR just for convenience
1490 * - NIOCREGIF
1439 * - NIOCUNREGIF
1440 * - NIOCTXSYNC
1441 * - NIOCRXSYNC
1442 *
1443 * Return 0 on success, errno otherwise.
1444 */
1445int
1446netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
1447 int fflag, struct thread *td)

--- 19 unchanged lines hidden (view full) ---

1467 ({ ((struct file *)td)->private_data = p; (p ? 0 : EINVAL); })
1468
1469
1470#define devfs_clear_cdevpriv() do { \
1471 netmap_dtor(priv); ((struct file *)td)->private_data = 0; \
1472 } while (0)
1473#endif /* linux */
1474
1491 * - NIOCTXSYNC
1492 * - NIOCRXSYNC
1493 *
1494 * Return 0 on success, errno otherwise.
1495 */
1496int
1497netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
1498 int fflag, struct thread *td)

--- 19 unchanged lines hidden (view full) ---

1518 ({ ((struct file *)td)->private_data = p; (p ? 0 : EINVAL); })
1519
1520
1521#define devfs_clear_cdevpriv() do { \
1522 netmap_dtor(priv); ((struct file *)td)->private_data = 0; \
1523 } while (0)
1524#endif /* linux */
1525
1526 if (cmd == NIOCGINFO || cmd == NIOCREGIF) {
1527 /* truncate name */
1528 nmr->nr_name[sizeof(nmr->nr_name) - 1] = '\0';
1529 if (nmr->nr_version != NETMAP_API) {
1530 D("API mismatch for %s got %d need %d",
1531 nmr->nr_name,
1532 nmr->nr_version, NETMAP_API);
1533 nmr->nr_version = NETMAP_API;
1534 return EINVAL;
1535 }
1536 }
1475 CURVNET_SET(TD_TO_VNET(td));
1476
1477 error = devfs_get_cdevpriv((void **)&priv);
1478 if (error) {
1479 CURVNET_RESTORE();
1480 /* XXX ENOENT should be impossible, since the priv
1481 * is now created in the open */
1482 return (error == ENOENT ? ENXIO : error);
1483 }
1484
1537 CURVNET_SET(TD_TO_VNET(td));
1538
1539 error = devfs_get_cdevpriv((void **)&priv);
1540 if (error) {
1541 CURVNET_RESTORE();
1542 /* XXX ENOENT should be impossible, since the priv
1543 * is now created in the open */
1544 return (error == ENOENT ? ENXIO : error);
1545 }
1546
1485 nmr->nr_name[sizeof(nmr->nr_name) - 1] = '\0'; /* truncate name */
1486 switch (cmd) {
1487 case NIOCGINFO: /* return capabilities etc */
1547 switch (cmd) {
1548 case NIOCGINFO: /* return capabilities etc */
1488 if (nmr->nr_version != NETMAP_API) {
1489 D("API mismatch got %d have %d",
1490 nmr->nr_version, NETMAP_API);
1491 nmr->nr_version = NETMAP_API;
1492 error = EINVAL;
1493 break;
1494 }
1495 if (nmr->nr_cmd == NETMAP_BDG_LIST) {
1496 error = netmap_bdg_ctl(nmr, NULL);
1497 break;
1498 }
1499
1500 NMG_LOCK();
1501 do {
1502 /* memsize is always valid */

--- 23 unchanged lines hidden (view full) ---

1526 if (memflags & NETMAP_MEM_PRIVATE)
1527 nmr->nr_ringid |= NETMAP_PRIV_MEM;
1528 netmap_adapter_put(na);
1529 } while (0);
1530 NMG_UNLOCK();
1531 break;
1532
1533 case NIOCREGIF:
1549 if (nmr->nr_cmd == NETMAP_BDG_LIST) {
1550 error = netmap_bdg_ctl(nmr, NULL);
1551 break;
1552 }
1553
1554 NMG_LOCK();
1555 do {
1556 /* memsize is always valid */

--- 23 unchanged lines hidden (view full) ---

1580 if (memflags & NETMAP_MEM_PRIVATE)
1581 nmr->nr_ringid |= NETMAP_PRIV_MEM;
1582 netmap_adapter_put(na);
1583 } while (0);
1584 NMG_UNLOCK();
1585 break;
1586
1587 case NIOCREGIF:
1534 if (nmr->nr_version != NETMAP_API) {
1535 nmr->nr_version = NETMAP_API;
1536 error = EINVAL;
1537 break;
1538 }
1539 /* possibly attach/detach NIC and VALE switch */
1540 i = nmr->nr_cmd;
1541 if (i == NETMAP_BDG_ATTACH || i == NETMAP_BDG_DETACH
1542 || i == NETMAP_BDG_OFFSET) {
1543 error = netmap_bdg_ctl(nmr, NULL);
1544 break;
1545 } else if (i != 0) {
1546 D("nr_cmd must be 0 not %d", i);

--- 41 unchanged lines hidden (view full) ---

1588 nmr->nr_ringid |= NETMAP_PRIV_MEM;
1589 *(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM;
1590 }
1591 nmr->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp);
1592 } while (0);
1593 NMG_UNLOCK();
1594 break;
1595
1588 /* possibly attach/detach NIC and VALE switch */
1589 i = nmr->nr_cmd;
1590 if (i == NETMAP_BDG_ATTACH || i == NETMAP_BDG_DETACH
1591 || i == NETMAP_BDG_OFFSET) {
1592 error = netmap_bdg_ctl(nmr, NULL);
1593 break;
1594 } else if (i != 0) {
1595 D("nr_cmd must be 0 not %d", i);

--- 41 unchanged lines hidden (view full) ---

1637 nmr->nr_ringid |= NETMAP_PRIV_MEM;
1638 *(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM;
1639 }
1640 nmr->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp);
1641 } while (0);
1642 NMG_UNLOCK();
1643 break;
1644
1596 case NIOCUNREGIF:
1597 // XXX we have no data here ?
1598 D("deprecated, data is %p", nmr);
1599 error = EINVAL;
1600 break;
1601
1602 case NIOCTXSYNC:
1603 case NIOCRXSYNC:
1604 nifp = priv->np_nifp;
1605
1606 if (nifp == NULL) {
1607 error = ENXIO;
1608 break;
1609 }

--- 34 unchanged lines hidden (view full) ---

1644 error = EBUSY;
1645 goto out;
1646 }
1647 if (cmd == NIOCTXSYNC) {
1648 if (netmap_verbose & NM_VERB_TXSYNC)
1649 D("pre txsync ring %d cur %d hwcur %d",
1650 i, kring->ring->cur,
1651 kring->nr_hwcur);
1645 case NIOCTXSYNC:
1646 case NIOCRXSYNC:
1647 nifp = priv->np_nifp;
1648
1649 if (nifp == NULL) {
1650 error = ENXIO;
1651 break;
1652 }

--- 34 unchanged lines hidden (view full) ---

1687 error = EBUSY;
1688 goto out;
1689 }
1690 if (cmd == NIOCTXSYNC) {
1691 if (netmap_verbose & NM_VERB_TXSYNC)
1692 D("pre txsync ring %d cur %d hwcur %d",
1693 i, kring->ring->cur,
1694 kring->nr_hwcur);
1652 na->nm_txsync(na, i, NAF_FORCE_RECLAIM);
1695 if (nm_txsync_prologue(kring) >= kring->nkr_num_slots) {
1696 netmap_ring_reinit(kring);
1697 } else {
1698 na->nm_txsync(na, i, NAF_FORCE_RECLAIM);
1699 }
1653 if (netmap_verbose & NM_VERB_TXSYNC)
1654 D("post txsync ring %d cur %d hwcur %d",
1655 i, kring->ring->cur,
1656 kring->nr_hwcur);
1657 } else {
1658 na->nm_rxsync(na, i, NAF_FORCE_READ);
1659 microtime(&na->rx_rings[i].ring->ts);
1660 }

--- 60 unchanged lines hidden (view full) ---

1721int
1722netmap_poll(struct cdev *dev, int events, struct thread *td)
1723{
1724 struct netmap_priv_d *priv = NULL;
1725 struct netmap_adapter *na;
1726 struct ifnet *ifp;
1727 struct netmap_kring *kring;
1728 u_int i, check_all_tx, check_all_rx, want_tx, want_rx, revents = 0;
1700 if (netmap_verbose & NM_VERB_TXSYNC)
1701 D("post txsync ring %d cur %d hwcur %d",
1702 i, kring->ring->cur,
1703 kring->nr_hwcur);
1704 } else {
1705 na->nm_rxsync(na, i, NAF_FORCE_READ);
1706 microtime(&na->rx_rings[i].ring->ts);
1707 }

--- 60 unchanged lines hidden (view full) ---

1768int
1769netmap_poll(struct cdev *dev, int events, struct thread *td)
1770{
1771 struct netmap_priv_d *priv = NULL;
1772 struct netmap_adapter *na;
1773 struct ifnet *ifp;
1774 struct netmap_kring *kring;
1775 u_int i, check_all_tx, check_all_rx, want_tx, want_rx, revents = 0;
1729 u_int lim_tx, lim_rx, host_forwarded = 0;
1730 struct mbq q;
1776 u_int lim_tx, lim_rx;
1777 struct mbq q; /* packets from hw queues to host stack */
1731 void *pwait = dev; /* linux compatibility */
1732
1733 /*
1734 * In order to avoid nested locks, we need to "double check"
1735 * txsync and rxsync if we decide to do a selrecord().
1736 * retry_tx (and retry_rx, later) prevent looping forever.
1737 */
1778 void *pwait = dev; /* linux compatibility */
1779
1780 /*
1781 * In order to avoid nested locks, we need to "double check"
1782 * txsync and rxsync if we decide to do a selrecord().
1783 * retry_tx (and retry_rx, later) prevent looping forever.
1784 */
1738 int retry_tx = 1;
1785 int retry_tx = 1, retry_rx = 1;
1739
1740 (void)pwait;
1741 mbq_init(&q);
1742
1743 if (devfs_get_cdevpriv((void **)&priv) != 0 || priv == NULL)
1744 return POLLERR;
1745
1746 if (priv->np_nifp == NULL) {

--- 17 unchanged lines hidden (view full) ---

1764 D("device %s events 0x%x", NM_IFPNAME(ifp), events);
1765 want_tx = events & (POLLOUT | POLLWRNORM);
1766 want_rx = events & (POLLIN | POLLRDNORM);
1767
1768 lim_tx = na->num_tx_rings;
1769 lim_rx = na->num_rx_rings;
1770
1771 if (priv->np_qfirst == NETMAP_SW_RING) {
1786
1787 (void)pwait;
1788 mbq_init(&q);
1789
1790 if (devfs_get_cdevpriv((void **)&priv) != 0 || priv == NULL)
1791 return POLLERR;
1792
1793 if (priv->np_nifp == NULL) {

--- 17 unchanged lines hidden (view full) ---

1811 D("device %s events 0x%x", NM_IFPNAME(ifp), events);
1812 want_tx = events & (POLLOUT | POLLWRNORM);
1813 want_rx = events & (POLLIN | POLLRDNORM);
1814
1815 lim_tx = na->num_tx_rings;
1816 lim_rx = na->num_rx_rings;
1817
1818 if (priv->np_qfirst == NETMAP_SW_RING) {
1819 // XXX locking ?
1772 /* handle the host stack ring */
1773 if (priv->np_txpoll || want_tx) {
1774 /* push any packets up, then we are always ready */
1775 netmap_txsync_to_host(na);
1776 revents |= want_tx;
1777 }
1778 if (want_rx) {
1779 kring = &na->rx_rings[lim_rx];
1820 /* handle the host stack ring */
1821 if (priv->np_txpoll || want_tx) {
1822 /* push any packets up, then we are always ready */
1823 netmap_txsync_to_host(na);
1824 revents |= want_tx;
1825 }
1826 if (want_rx) {
1827 kring = &na->rx_rings[lim_rx];
1780 if (kring->ring->avail == 0)
1828 /* XXX replace with rxprologue etc. */
1829 if (nm_ring_empty(kring->ring))
1781 netmap_rxsync_from_host(na, td, dev);
1830 netmap_rxsync_from_host(na, td, dev);
1782 if (kring->ring->avail > 0) {
1831 if (!nm_ring_empty(kring->ring))
1783 revents |= want_rx;
1832 revents |= want_rx;
1784 }
1785 }
1786 return (revents);
1787 }
1788
1833 }
1834 return (revents);
1835 }
1836
1789 /*
1790 * If we are in transparent mode, check also the host rx ring
1791 * XXX Transparent mode at the moment requires to bind all
1792 * rings to a single file descriptor.
1793 */
1794 kring = &na->rx_rings[lim_rx];
1795 if ( (priv->np_qlast == NETMAP_HW_RING) // XXX check_all
1796 && want_rx
1797 && (netmap_fwd || kring->ring->flags & NR_FORWARD) ) {
1798 if (kring->ring->avail == 0)
1799 netmap_rxsync_from_host(na, td, dev);
1800 if (kring->ring->avail > 0)
1801 revents |= want_rx;
1802 }
1803
1804 /*
1805 * check_all_{tx|rx} are set if the card has more than one queue AND
1806 * the file descriptor is bound to all of them. If so, we sleep on
1807 * the "global" selinfo, otherwise we sleep on individual selinfo
1808 * (FreeBSD only allows two selinfo's per file descriptor).
1809 * The interrupt routine in the driver wake one or the other
1810 * (or both) depending on which clients are active.

--- 9 unchanged lines hidden (view full) ---

1820 if (priv->np_qlast != NETMAP_HW_RING) {
1821 lim_tx = lim_rx = priv->np_qlast;
1822 }
1823
1824 /*
1825 * We start with a lock free round which is cheap if we have
1826 * slots available. If this fails, then lock and call the sync
1827 * routines.
1837
1838 /*
1839 * check_all_{tx|rx} are set if the card has more than one queue AND
1840 * the file descriptor is bound to all of them. If so, we sleep on
1841 * the "global" selinfo, otherwise we sleep on individual selinfo
1842 * (FreeBSD only allows two selinfo's per file descriptor).
1843 * The interrupt routine in the driver wake one or the other
1844 * (or both) depending on which clients are active.

--- 9 unchanged lines hidden (view full) ---

1854 if (priv->np_qlast != NETMAP_HW_RING) {
1855 lim_tx = lim_rx = priv->np_qlast;
1856 }
1857
1858 /*
1859 * We start with a lock free round which is cheap if we have
1860 * slots available. If this fails, then lock and call the sync
1861 * routines.
1828 * XXX rather than ring->avail >0 should check that
1829 * ring->cur has not reached hwcur+hwavail
1830 */
1831 for (i = priv->np_qfirst; want_rx && i < lim_rx; i++) {
1832 kring = &na->rx_rings[i];
1862 */
1863 for (i = priv->np_qfirst; want_rx && i < lim_rx; i++) {
1864 kring = &na->rx_rings[i];
1833 if (kring->ring->avail > 0) {
1865 /* XXX compare ring->cur and kring->tail */
1866 if (!nm_ring_empty(kring->ring)) {
1834 revents |= want_rx;
1835 want_rx = 0; /* also breaks the loop */
1836 }
1837 }
1838 for (i = priv->np_qfirst; want_tx && i < lim_tx; i++) {
1839 kring = &na->tx_rings[i];
1867 revents |= want_rx;
1868 want_rx = 0; /* also breaks the loop */
1869 }
1870 }
1871 for (i = priv->np_qfirst; want_tx && i < lim_tx; i++) {
1872 kring = &na->tx_rings[i];
1840 if (kring->ring->avail > 0) {
1873 /* XXX compare ring->cur and kring->tail */
1874 if (!nm_ring_empty(kring->ring)) {
1841 revents |= want_tx;
1842 want_tx = 0; /* also breaks the loop */
1843 }
1844 }
1845
1846 /*
1875 revents |= want_tx;
1876 want_tx = 0; /* also breaks the loop */
1877 }
1878 }
1879
1880 /*
1847 * If we to push packets out (priv->np_txpoll) or want_tx is
1848 * still set, we do need to run the txsync calls (on all rings,
1849 * to avoid that the tx rings stall).
1881 * If we want to push packets out (priv->np_txpoll) or
1882 * want_tx is still set, we must issue txsync calls
1883 * (on all rings, to avoid that the tx rings stall).
1850 * XXX should also check cur != hwcur on the tx rings.
1851 * Fortunately, normal tx mode has np_txpoll set.
1852 */
1853 if (priv->np_txpoll || want_tx) {
1884 * XXX should also check cur != hwcur on the tx rings.
1885 * Fortunately, normal tx mode has np_txpoll set.
1886 */
1887 if (priv->np_txpoll || want_tx) {
1854 /* If we really want to be woken up (want_tx),
1855 * do a selrecord, either on the global or on
1856 * the private structure. Then issue the txsync
1857 * so there is no race in the selrecord/selwait
1888 /*
1889 * The first round checks if anyone is ready, if not
1890 * do a selrecord and another round to handle races.
1891 * want_tx goes to 0 if any space is found, and is
1892 * used to skip rings with no pending transmissions.
1858 */
1859flush_tx:
1860 for (i = priv->np_qfirst; i < lim_tx; i++) {
1893 */
1894flush_tx:
1895 for (i = priv->np_qfirst; i < lim_tx; i++) {
1896 int found = 0;
1897
1861 kring = &na->tx_rings[i];
1898 kring = &na->tx_rings[i];
1862 /*
1863 * Skip this ring if want_tx == 0
1864 * (we have already done a successful sync on
1865 * a previous ring) AND kring->cur == kring->hwcur
1866 * (there are no pending transmissions for this ring).
1867 */
1868 if (!want_tx && kring->ring->cur == kring->nr_hwcur)
1869 continue;
1899 if (!want_tx && kring->ring->cur == kring->nr_hwcur)
1900 continue;
1870 /* make sure only one user thread is doing this */
1901 /* only one thread does txsync */
1871 if (nm_kr_tryget(kring)) {
1902 if (nm_kr_tryget(kring)) {
1872 ND("ring %p busy is %d",
1873 kring, (int)kring->nr_busy);
1903 D("%p lost race on txring %d, ok", priv, i);
1904 continue;
1905 }
1906 if (nm_txsync_prologue(kring) >= kring->nkr_num_slots) {
1907 netmap_ring_reinit(kring);
1874 revents |= POLLERR;
1908 revents |= POLLERR;
1875 goto out;
1909 } else {
1910 if (na->nm_txsync(na, i, 0))
1911 revents |= POLLERR;
1876 }
1877
1912 }
1913
1878 if (netmap_verbose & NM_VERB_TXSYNC)
1879 D("send %d on %s %d",
1880 kring->ring->cur, NM_IFPNAME(ifp), i);
1881 if (na->nm_txsync(na, i, 0))
1882 revents |= POLLERR;
1883
1884 /* Check avail and call selrecord only if
1885 * called with POLLOUT and run out of bufs.
1886 * XXX Note, we cannot trust much ring->avail
1887 * as it is exposed to userspace (even though
1888 * just updated by txsync). We should really
1889 * check kring->nr_hwavail or better have
1890 * txsync set a flag telling if we need
1891 * to do a selrecord().
1914 /*
1915 * If we found new slots, notify potential
1916 * listeners on the same ring.
1917 * Since we just did a txsync, look at the copies
1918 * of cur,tail in the kring.
1892 */
1919 */
1893 if (want_tx) {
1894 if (kring->ring->avail > 0) {
1895 /* stop at the first ring. We don't risk
1896 * starvation.
1897 */
1898 revents |= want_tx;
1899 want_tx = 0;
1900 }
1901 }
1920 found = kring->rcur != kring->rtail;
1902 nm_kr_put(kring);
1921 nm_kr_put(kring);
1922 if (found) { /* notify other listeners */
1923 revents |= want_tx;
1924 want_tx = 0;
1925 na->nm_notify(na, i, NR_TX, NAF_GLOBAL_NOTIFY);
1926 }
1903 }
1904 if (want_tx && retry_tx) {
1905 selrecord(td, check_all_tx ?
1906 &na->tx_si : &na->tx_rings[priv->np_qfirst].si);
1907 retry_tx = 0;
1908 goto flush_tx;
1909 }
1910 }
1911
1912 /*
1927 }
1928 if (want_tx && retry_tx) {
1929 selrecord(td, check_all_tx ?
1930 &na->tx_si : &na->tx_rings[priv->np_qfirst].si);
1931 retry_tx = 0;
1932 goto flush_tx;
1933 }
1934 }
1935
1936 /*
1913 * now if want_rx is still set we need to lock and rxsync.
1937 * If want_rx is still set scan receive rings.
1914 * Do it on all rings because otherwise we starve.
1915 */
1916 if (want_rx) {
1938 * Do it on all rings because otherwise we starve.
1939 */
1940 if (want_rx) {
1917 int retry_rx = 1;
1941 int send_down = 0; /* transparent mode */
1942 /* two rounds here to for race avoidance */
1918do_retry_rx:
1919 for (i = priv->np_qfirst; i < lim_rx; i++) {
1943do_retry_rx:
1944 for (i = priv->np_qfirst; i < lim_rx; i++) {
1945 int found = 0;
1946
1920 kring = &na->rx_rings[i];
1921
1922 if (nm_kr_tryget(kring)) {
1947 kring = &na->rx_rings[i];
1948
1949 if (nm_kr_tryget(kring)) {
1923 revents |= POLLERR;
1924 goto out;
1950 D("%p lost race on rxring %d, ok", priv, i);
1951 continue;
1925 }
1926
1952 }
1953
1927 /* XXX NR_FORWARD should only be read on
1954 /*
1955 * transparent mode support: collect packets
1956 * from the rxring(s).
1957 * XXX NR_FORWARD should only be read on
1928 * physical or NIC ports
1929 */
1930 if (netmap_fwd ||kring->ring->flags & NR_FORWARD) {
1931 ND(10, "forwarding some buffers up %d to %d",
1932 kring->nr_hwcur, kring->ring->cur);
1933 netmap_grab_packets(kring, &q, netmap_fwd);
1934 }
1935
1936 if (na->nm_rxsync(na, i, 0))
1937 revents |= POLLERR;
1938 if (netmap_no_timestamp == 0 ||
1939 kring->ring->flags & NR_TIMESTAMP) {
1940 microtime(&kring->ring->ts);
1941 }
1958 * physical or NIC ports
1959 */
1960 if (netmap_fwd ||kring->ring->flags & NR_FORWARD) {
1961 ND(10, "forwarding some buffers up %d to %d",
1962 kring->nr_hwcur, kring->ring->cur);
1963 netmap_grab_packets(kring, &q, netmap_fwd);
1964 }
1965
1966 if (na->nm_rxsync(na, i, 0))
1967 revents |= POLLERR;
1968 if (netmap_no_timestamp == 0 ||
1969 kring->ring->flags & NR_TIMESTAMP) {
1970 microtime(&kring->ring->ts);
1971 }
1942
1943 if (kring->ring->avail > 0) {
1972 /* after an rxsync we can use kring->rcur, rtail */
1973 found = kring->rcur != kring->rtail;
1974 nm_kr_put(kring);
1975 if (found) {
1944 revents |= want_rx;
1945 retry_rx = 0;
1976 revents |= want_rx;
1977 retry_rx = 0;
1978 na->nm_notify(na, i, NR_RX, NAF_GLOBAL_NOTIFY);
1946 }
1979 }
1947 nm_kr_put(kring);
1948 }
1980 }
1949 if (retry_rx) {
1950 retry_rx = 0;
1981
1982 /* transparent mode XXX only during first pass ? */
1983 kring = &na->rx_rings[lim_rx];
1984 if (check_all_rx
1985 && (netmap_fwd || kring->ring->flags & NR_FORWARD)) {
1986 /* XXX fix to use kring fields */
1987 if (nm_ring_empty(kring->ring))
1988 send_down = netmap_rxsync_from_host(na, td, dev);
1989 if (!nm_ring_empty(kring->ring))
1990 revents |= want_rx;
1991 }
1992
1993 if (retry_rx)
1951 selrecord(td, check_all_rx ?
1952 &na->rx_si : &na->rx_rings[priv->np_qfirst].si);
1994 selrecord(td, check_all_rx ?
1995 &na->rx_si : &na->rx_rings[priv->np_qfirst].si);
1953 goto do_retry_rx;
1996 if (send_down > 0 || retry_rx) {
1997 retry_rx = 0;
1998 if (send_down)
1999 goto flush_tx; /* and retry_rx */
2000 else
2001 goto do_retry_rx;
1954 }
1955 }
1956
2002 }
2003 }
2004
1957 /* forward host to the netmap ring.
1958 * I am accessing nr_hwavail without lock, but netmap_transmit
1959 * can only increment it, so the operation is safe.
2005 /*
2006 * Transparent mode: marked bufs on rx rings between
2007 * kring->nr_hwcur and ring->head
2008 * are passed to the other endpoint.
2009 *
2010 * In this mode we also scan the sw rxring, which in
2011 * turn passes packets up.
2012 *
2013 * XXX Transparent mode at the moment requires to bind all
2014 * rings to a single file descriptor.
1960 */
2015 */
1961 kring = &na->rx_rings[lim_rx];
1962 if ( (priv->np_qlast == NETMAP_HW_RING) // XXX check_all
1963 && (netmap_fwd || kring->ring->flags & NR_FORWARD)
1964 && kring->nr_hwavail > 0 && !host_forwarded) {
1965 netmap_sw_to_nic(na);
1966 host_forwarded = 1; /* prevent another pass */
1967 want_rx = 0;
1968 goto flush_tx;
1969 }
1970
1971 if (q.head)
1972 netmap_send_up(na->ifp, &q);
1973
2016
2017 if (q.head)
2018 netmap_send_up(na->ifp, &q);
2019
1974out:
1975
1976 return (revents);
1977}
1978
2020 return (revents);
2021}
2022
1979/*------- driver support routines ------*/
1980
2023
2024/*-------------------- driver support routines -------------------*/
2025
1981static int netmap_hw_krings_create(struct netmap_adapter *);
1982
1983static int
2026static int netmap_hw_krings_create(struct netmap_adapter *);
2027
2028static int
1984netmap_notify(struct netmap_adapter *na, u_int n_ring, enum txrx tx, int flags)
2029netmap_notify(struct netmap_adapter *na, u_int n_ring,
2030 enum txrx tx, int flags)
1985{
1986 struct netmap_kring *kring;
1987
1988 if (tx == NR_TX) {
1989 kring = na->tx_rings + n_ring;
1990 selwakeuppri(&kring->si, PI_NET);
1991 if (flags & NAF_GLOBAL_NOTIFY)
1992 selwakeuppri(&na->tx_si, PI_NET);

--- 14 unchanged lines hidden (view full) ---

2007 struct ifnet *ifp = na->ifp;
2008
2009 if (na->num_tx_rings == 0 || na->num_rx_rings == 0) {
2010 D("%s: invalid rings tx %d rx %d",
2011 ifp->if_xname, na->num_tx_rings, na->num_rx_rings);
2012 return EINVAL;
2013 }
2014 WNA(ifp) = na;
2031{
2032 struct netmap_kring *kring;
2033
2034 if (tx == NR_TX) {
2035 kring = na->tx_rings + n_ring;
2036 selwakeuppri(&kring->si, PI_NET);
2037 if (flags & NAF_GLOBAL_NOTIFY)
2038 selwakeuppri(&na->tx_si, PI_NET);

--- 14 unchanged lines hidden (view full) ---

2053 struct ifnet *ifp = na->ifp;
2054
2055 if (na->num_tx_rings == 0 || na->num_rx_rings == 0) {
2056 D("%s: invalid rings tx %d rx %d",
2057 ifp->if_xname, na->num_tx_rings, na->num_rx_rings);
2058 return EINVAL;
2059 }
2060 WNA(ifp) = na;
2061
2062 /* the following is only needed for na that use the host port.
2063 * XXX do we have something similar for linux ?
2064 */
2065#ifdef __FreeBSD__
2066 na->if_input = ifp->if_input; /* for netmap_send_up */
2067#endif /* __FreeBSD__ */
2068
2015 NETMAP_SET_CAPABLE(ifp);
2016 if (na->nm_krings_create == NULL) {
2017 na->nm_krings_create = netmap_hw_krings_create;
2069 NETMAP_SET_CAPABLE(ifp);
2070 if (na->nm_krings_create == NULL) {
2071 na->nm_krings_create = netmap_hw_krings_create;
2018 na->nm_krings_delete = netmap_krings_delete;
2072 na->nm_krings_delete = netmap_hw_krings_delete;
2019 }
2020 if (na->nm_notify == NULL)
2021 na->nm_notify = netmap_notify;
2022 na->active_fds = 0;
2023
2024 if (na->nm_mem == NULL)
2025 na->nm_mem = &nm_mem;
2026 return 0;

--- 19 unchanged lines hidden (view full) ---

2046
2047/*
2048 * Initialize a ``netmap_adapter`` object created by driver on attach.
2049 * We allocate a block of memory with room for a struct netmap_adapter
2050 * plus two sets of N+2 struct netmap_kring (where N is the number
2051 * of hardware rings):
2052 * krings 0..N-1 are for the hardware queues.
2053 * kring N is for the host stack queue
2073 }
2074 if (na->nm_notify == NULL)
2075 na->nm_notify = netmap_notify;
2076 na->active_fds = 0;
2077
2078 if (na->nm_mem == NULL)
2079 na->nm_mem = &nm_mem;
2080 return 0;

--- 19 unchanged lines hidden (view full) ---

2100
2101/*
2102 * Initialize a ``netmap_adapter`` object created by driver on attach.
2103 * We allocate a block of memory with room for a struct netmap_adapter
2104 * plus two sets of N+2 struct netmap_kring (where N is the number
2105 * of hardware rings):
2106 * krings 0..N-1 are for the hardware queues.
2107 * kring N is for the host stack queue
2054 * kring N+1 is only used for the selinfo for all queues.
2108 * kring N+1 is only used for the selinfo for all queues. // XXX still true ?
2055 * Return 0 on success, ENOMEM otherwise.
2109 * Return 0 on success, ENOMEM otherwise.
2056 *
2057 * By default the receive and transmit adapter ring counts are both initialized
2058 * to num_queues. na->num_tx_rings can be set for cards with different tx/rx
2059 * setups.
2060 */
2061int
2062netmap_attach(struct netmap_adapter *arg)
2063{
2064 struct netmap_hw_adapter *hwna = NULL;
2065 // XXX when is arg == NULL ?
2066 struct ifnet *ifp = arg ? arg->ifp : NULL;
2067

--- 59 unchanged lines hidden (view full) ---

2127
2128 return 1;
2129}
2130
2131
2132int
2133netmap_hw_krings_create(struct netmap_adapter *na)
2134{
2110 */
2111int
2112netmap_attach(struct netmap_adapter *arg)
2113{
2114 struct netmap_hw_adapter *hwna = NULL;
2115 // XXX when is arg == NULL ?
2116 struct ifnet *ifp = arg ? arg->ifp : NULL;
2117

--- 59 unchanged lines hidden (view full) ---

2177
2178 return 1;
2179}
2180
2181
2182int
2183netmap_hw_krings_create(struct netmap_adapter *na)
2184{
2135 return netmap_krings_create(na,
2185 int ret = netmap_krings_create(na,
2136 na->num_tx_rings + 1, na->num_rx_rings + 1, 0);
2186 na->num_tx_rings + 1, na->num_rx_rings + 1, 0);
2187 if (ret == 0) {
2188 /* initialize the mbq for the sw rx ring */
2189 mbq_safe_init(&na->rx_rings[na->num_rx_rings].rx_queue);
2190 ND("initialized sw rx queue %d", na->num_rx_rings);
2191 }
2192 return ret;
2137}
2138
2139
2140
2141/*
2142 * Free the allocated memory linked to the given ``netmap_adapter``
2143 * object.
2144 */

--- 12 unchanged lines hidden (view full) ---

2157 netmap_enable_all_rings(ifp);
2158 NMG_UNLOCK();
2159}
2160
2161
2162/*
2163 * Intercept packets from the network stack and pass them
2164 * to netmap as incoming packets on the 'software' ring.
2193}
2194
2195
2196
2197/*
2198 * Free the allocated memory linked to the given ``netmap_adapter``
2199 * object.
2200 */

--- 12 unchanged lines hidden (view full) ---

2213 netmap_enable_all_rings(ifp);
2214 NMG_UNLOCK();
2215}
2216
2217
2218/*
2219 * Intercept packets from the network stack and pass them
2220 * to netmap as incoming packets on the 'software' ring.
2221 *
2222 * We only store packets in a bounded mbq and then copy them
2223 * in the relevant rxsync routine.
2224 *
2165 * We rely on the OS to make sure that the ifp and na do not go
2166 * away (typically the caller checks for IFF_DRV_RUNNING or the like).
2167 * In nm_register() or whenever there is a reinitialization,
2168 * we make sure to make the mode change visible here.
2169 */
2170int
2171netmap_transmit(struct ifnet *ifp, struct mbuf *m)
2172{
2173 struct netmap_adapter *na = NA(ifp);
2174 struct netmap_kring *kring;
2225 * We rely on the OS to make sure that the ifp and na do not go
2226 * away (typically the caller checks for IFF_DRV_RUNNING or the like).
2227 * In nm_register() or whenever there is a reinitialization,
2228 * we make sure to make the mode change visible here.
2229 */
2230int
2231netmap_transmit(struct ifnet *ifp, struct mbuf *m)
2232{
2233 struct netmap_adapter *na = NA(ifp);
2234 struct netmap_kring *kring;
2175 u_int i, len = MBUF_LEN(m);
2176 u_int error = EBUSY, lim;
2177 struct netmap_slot *slot;
2235 u_int len = MBUF_LEN(m);
2236 u_int error = ENOBUFS;
2237 struct mbq *q;
2238 int space;
2178
2179 // XXX [Linux] we do not need this lock
2180 // if we follow the down/configure/up protocol -gl
2181 // mtx_lock(&na->core_lock);
2239
2240 // XXX [Linux] we do not need this lock
2241 // if we follow the down/configure/up protocol -gl
2242 // mtx_lock(&na->core_lock);
2243
2182 if ( (ifp->if_capenable & IFCAP_NETMAP) == 0) {
2244 if ( (ifp->if_capenable & IFCAP_NETMAP) == 0) {
2183 /* interface not in netmap mode anymore */
2245 D("%s not in netmap mode anymore", NM_IFPNAME(ifp));
2184 error = ENXIO;
2185 goto done;
2186 }
2187
2188 kring = &na->rx_rings[na->num_rx_rings];
2246 error = ENXIO;
2247 goto done;
2248 }
2249
2250 kring = &na->rx_rings[na->num_rx_rings];
2189 lim = kring->nkr_num_slots - 1;
2190 if (netmap_verbose & NM_VERB_HOST)
2191 D("%s packet %d len %d from the stack", NM_IFPNAME(ifp),
2192 kring->nr_hwcur + kring->nr_hwavail, len);
2251 q = &kring->rx_queue;
2252
2193 // XXX reconsider long packets if we handle fragments
2194 if (len > NETMAP_BDG_BUF_SIZE(na->nm_mem)) { /* too long for us */
2195 D("%s from_host, drop packet size %d > %d", NM_IFPNAME(ifp),
2196 len, NETMAP_BDG_BUF_SIZE(na->nm_mem));
2197 goto done;
2198 }
2253 // XXX reconsider long packets if we handle fragments
2254 if (len > NETMAP_BDG_BUF_SIZE(na->nm_mem)) { /* too long for us */
2255 D("%s from_host, drop packet size %d > %d", NM_IFPNAME(ifp),
2256 len, NETMAP_BDG_BUF_SIZE(na->nm_mem));
2257 goto done;
2258 }
2199 /* protect against other instances of netmap_transmit,
2200 * and userspace invocations of rxsync().
2259
2260 /* protect against rxsync_from_host(), netmap_sw_to_nic()
2261 * and maybe other instances of netmap_transmit (the latter
2262 * not possible on Linux).
2263 * Also avoid overflowing the queue.
2201 */
2264 */
2202 // XXX [Linux] there can be no other instances of netmap_transmit
2203 // on this same ring, but we still need this lock to protect
2204 // concurrent access from netmap_sw_to_nic() -gl
2205 mtx_lock(&kring->q_lock);
2206 if (kring->nr_hwavail >= lim) {
2207 if (netmap_verbose)
2208 D("stack ring %s full\n", NM_IFPNAME(ifp));
2265 mtx_lock(&q->lock);
2266
2267 space = kring->nr_hwtail - kring->nr_hwcur;
2268 if (space < 0)
2269 space += kring->nkr_num_slots;
2270 if (space + mbq_len(q) >= kring->nkr_num_slots - 1) { // XXX
2271 RD(10, "%s full hwcur %d hwtail %d qlen %d len %d m %p",
2272 NM_IFPNAME(ifp), kring->nr_hwcur, kring->nr_hwtail, mbq_len(q),
2273 len, m);
2209 } else {
2274 } else {
2210 /* compute the insert position */
2211 i = nm_kr_rxpos(kring);
2212 slot = &kring->ring->slot[i];
2213 m_copydata(m, 0, (int)len, BDG_NMB(na, slot));
2214 slot->len = len;
2215 slot->flags = kring->nkr_slot_flags;
2216 kring->nr_hwavail++;
2217 if (netmap_verbose & NM_VERB_HOST)
2218 D("wake up host ring %s %d", NM_IFPNAME(na->ifp), na->num_rx_rings);
2219 na->nm_notify(na, na->num_rx_rings, NR_RX, 0);
2275 mbq_enqueue(q, m);
2276 ND(10, "%s %d bufs in queue len %d m %p",
2277 NM_IFPNAME(ifp), mbq_len(q), len, m);
2278 /* notify outside the lock */
2279 m = NULL;
2220 error = 0;
2221 }
2280 error = 0;
2281 }
2222 mtx_unlock(&kring->q_lock);
2282 mtx_unlock(&q->lock);
2223
2224done:
2283
2284done:
2225 // mtx_unlock(&na->core_lock);
2285 if (m)
2286 m_freem(m);
2287 /* unconditionally wake up listeners */
2288 na->nm_notify(na, na->num_rx_rings, NR_RX, 0);
2226
2289
2227 /* release the mbuf in either cases of success or failure. As an
2228 * alternative, put the mbuf in a free list and free the list
2229 * only when really necessary.
2230 */
2231 m_freem(m);
2232
2233 return (error);
2234}
2235
2236
2237/*
2238 * netmap_reset() is called by the driver routines when reinitializing
2239 * a ring. The driver is in charge of locking to protect the kring.
2240 * If native netmap mode is not set just return NULL.

--- 21 unchanged lines hidden (view full) ---

2262 * - set a RESET flag somewhere in the kring
2263 * - do the processing in a conservative way
2264 * - let the *sync() fixup at the end.
2265 */
2266 if (tx == NR_TX) {
2267 if (n >= na->num_tx_rings)
2268 return NULL;
2269 kring = na->tx_rings + n;
2290 return (error);
2291}
2292
2293
2294/*
2295 * netmap_reset() is called by the driver routines when reinitializing
2296 * a ring. The driver is in charge of locking to protect the kring.
2297 * If native netmap mode is not set just return NULL.

--- 21 unchanged lines hidden (view full) ---

2319 * - set a RESET flag somewhere in the kring
2320 * - do the processing in a conservative way
2321 * - let the *sync() fixup at the end.
2322 */
2323 if (tx == NR_TX) {
2324 if (n >= na->num_tx_rings)
2325 return NULL;
2326 kring = na->tx_rings + n;
2327 // XXX check whether we should use hwcur or rcur
2270 new_hwofs = kring->nr_hwcur - new_cur;
2271 } else {
2272 if (n >= na->num_rx_rings)
2273 return NULL;
2274 kring = na->rx_rings + n;
2328 new_hwofs = kring->nr_hwcur - new_cur;
2329 } else {
2330 if (n >= na->num_rx_rings)
2331 return NULL;
2332 kring = na->rx_rings + n;
2275 new_hwofs = kring->nr_hwcur + kring->nr_hwavail - new_cur;
2333 new_hwofs = kring->nr_hwtail - new_cur;
2276 }
2277 lim = kring->nkr_num_slots - 1;
2278 if (new_hwofs > lim)
2279 new_hwofs -= lim + 1;
2280
2281 /* Always set the new offset value and realign the ring. */
2334 }
2335 lim = kring->nkr_num_slots - 1;
2336 if (new_hwofs > lim)
2337 new_hwofs -= lim + 1;
2338
2339 /* Always set the new offset value and realign the ring. */
2282 D("%s hwofs %d -> %d, hwavail %d -> %d",
2283 tx == NR_TX ? "TX" : "RX",
2340 if (netmap_verbose)
2341 D("%s %s%d hwofs %d -> %d, hwtail %d -> %d",
2342 NM_IFPNAME(na->ifp),
2343 tx == NR_TX ? "TX" : "RX", n,
2284 kring->nkr_hwofs, new_hwofs,
2344 kring->nkr_hwofs, new_hwofs,
2285 kring->nr_hwavail,
2286 tx == NR_TX ? lim : kring->nr_hwavail);
2345 kring->nr_hwtail,
2346 tx == NR_TX ? lim : kring->nr_hwtail);
2287 kring->nkr_hwofs = new_hwofs;
2347 kring->nkr_hwofs = new_hwofs;
2288 if (tx == NR_TX)
2289 kring->nr_hwavail = lim;
2290 kring->nr_hwreserved = 0;
2348 if (tx == NR_TX) {
2349 kring->nr_hwtail = kring->nr_hwcur + lim;
2350 if (kring->nr_hwtail > lim)
2351 kring->nr_hwtail -= lim + 1;
2352 }
2291
2292#if 0 // def linux
2293 /* XXX check that the mappings are correct */
2294 /* need ring_nr, adapter->pdev, direction */
2295 buffer_info->dma = dma_map_single(&pdev->dev, addr, adapter->rx_buffer_len, DMA_FROM_DEVICE);
2296 if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) {
2297 D("error mapping rx netmap buffer %d", i);
2298 // XXX fix error handling

--- 47 unchanged lines hidden (view full) ---

2346 if (q >= na->num_tx_rings)
2347 return; // not a physical queue
2348 kring = na->tx_rings + q;
2349 na->nm_notify(na, q, NR_TX,
2350 (na->num_tx_rings > 1 ? NAF_GLOBAL_NOTIFY : 0));
2351 }
2352}
2353
2353
2354#if 0 // def linux
2355 /* XXX check that the mappings are correct */
2356 /* need ring_nr, adapter->pdev, direction */
2357 buffer_info->dma = dma_map_single(&pdev->dev, addr, adapter->rx_buffer_len, DMA_FROM_DEVICE);
2358 if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) {
2359 D("error mapping rx netmap buffer %d", i);
2360 // XXX fix error handling

--- 47 unchanged lines hidden (view full) ---

2408 if (q >= na->num_tx_rings)
2409 return; // not a physical queue
2410 kring = na->tx_rings + q;
2411 na->nm_notify(na, q, NR_TX,
2412 (na->num_tx_rings > 1 ? NAF_GLOBAL_NOTIFY : 0));
2413 }
2414}
2415
2416
2354/*
2355 * Default functions to handle rx/tx interrupts from a physical device.
2356 * "work_done" is non-null on the RX path, NULL for the TX path.
2357 *
2358 * If the card is not in netmap mode, simply return 0,
2359 * so that the caller proceeds with regular processing.
2360 * Otherwise call netmap_common_irq() and return 1.
2361 *

--- 30 unchanged lines hidden (view full) ---

2392 * (but there is no chance)
2393 *
2394 * netmap_fini() destroys everything.
2395 */
2396
2397static struct cdev *netmap_dev; /* /dev/netmap character device. */
2398extern struct cdevsw netmap_cdevsw;
2399
2417/*
2418 * Default functions to handle rx/tx interrupts from a physical device.
2419 * "work_done" is non-null on the RX path, NULL for the TX path.
2420 *
2421 * If the card is not in netmap mode, simply return 0,
2422 * so that the caller proceeds with regular processing.
2423 * Otherwise call netmap_common_irq() and return 1.
2424 *

--- 30 unchanged lines hidden (view full) ---

2455 * (but there is no chance)
2456 *
2457 * netmap_fini() destroys everything.
2458 */
2459
2460static struct cdev *netmap_dev; /* /dev/netmap character device. */
2461extern struct cdevsw netmap_cdevsw;
2462
2463
2400void
2401netmap_fini(void)
2402{
2403 // XXX destroy_bridges() ?
2404 if (netmap_dev)
2405 destroy_dev(netmap_dev);
2406 netmap_mem_fini();
2407 NMG_LOCK_DESTROY();
2408 printf("netmap: unloaded module.\n");
2409}
2410
2464void
2465netmap_fini(void)
2466{
2467 // XXX destroy_bridges() ?
2468 if (netmap_dev)
2469 destroy_dev(netmap_dev);
2470 netmap_mem_fini();
2471 NMG_LOCK_DESTROY();
2472 printf("netmap: unloaded module.\n");
2473}
2474
2475
2411int
2412netmap_init(void)
2413{
2414 int error;
2415
2416 NMG_LOCK_INIT();
2417
2418 error = netmap_mem_init();

--- 15 unchanged lines hidden ---
2476int
2477netmap_init(void)
2478{
2479 int error;
2480
2481 NMG_LOCK_INIT();
2482
2483 error = netmap_mem_init();

--- 15 unchanged lines hidden ---