1/*
2 * Copyright (C) 2016-2018 Vincenzo Maffione
3 * Copyright (C) 2015 Stefano Garzarella
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *   1. Redistributions of source code must retain the above copyright
10 *      notice, this list of conditions and the following disclaimer.
11 *   2. Redistributions in binary form must reproduce the above copyright
12 *      notice, this list of conditions and the following disclaimer in the
13 *      documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 * $FreeBSD: stable/11/sys/dev/netmap/netmap_kloop.c 343866 2019-02-07 10:44:53Z vmaffione $
28 */
29
30/*
31 * common headers
32 */
33#if defined(__FreeBSD__)
34#include <sys/cdefs.h>
35#include <sys/param.h>
36#include <sys/kernel.h>
37#include <sys/types.h>
38#include <sys/selinfo.h>
39#include <sys/socket.h>
40#include <net/if.h>
41#include <net/if_var.h>
42#include <machine/bus.h>
43
44#define usleep_range(_1, _2) \
45        pause_sbt("sync-kloop-sleep", SBT_1US * _1, SBT_1US * 1, C_ABSOLUTE)
46
47#elif defined(linux)
48#include <bsd_glue.h>
49#include <linux/file.h>
50#include <linux/eventfd.h>
51#endif
52
53#include <net/netmap.h>
54#include <dev/netmap/netmap_kern.h>
55#include <net/netmap_virt.h>
56#include <dev/netmap/netmap_mem2.h>
57
58/* Support for eventfd-based notifications. */
59#if defined(linux)
60#define SYNC_KLOOP_POLL
61#endif
62
63/* Write kring pointers (hwcur, hwtail) to the CSB.
64 * This routine is coupled with ptnetmap_guest_read_kring_csb(). */
65static inline void
66sync_kloop_kernel_write(struct nm_csb_ktoa __user *ptr, uint32_t hwcur,
67			   uint32_t hwtail)
68{
69	/* Issue a first store-store barrier to make sure writes to the
70	 * netmap ring do not overcome updates on ktoa->hwcur and ktoa->hwtail. */
71	nm_stst_barrier();
72
73	/*
74	 * The same scheme used in nm_sync_kloop_appl_write() applies here.
75	 * We allow the application to read a value of hwcur more recent than the value
76	 * of hwtail, since this would anyway result in a consistent view of the
77	 * ring state (and hwcur can never wraparound hwtail, since hwcur must be
78	 * behind head).
79	 *
80	 * The following memory barrier scheme is used to make this happen:
81	 *
82	 *          Application            Kernel
83	 *
84	 *          STORE(hwcur)           LOAD(hwtail)
85	 *          wmb() <------------->  rmb()
86	 *          STORE(hwtail)          LOAD(hwcur)
87	 */
88	CSB_WRITE(ptr, hwcur, hwcur);
89	nm_stst_barrier();
90	CSB_WRITE(ptr, hwtail, hwtail);
91}
92
93/* Read kring pointers (head, cur, sync_flags) from the CSB.
94 * This routine is coupled with ptnetmap_guest_write_kring_csb(). */
95static inline void
96sync_kloop_kernel_read(struct nm_csb_atok __user *ptr,
97			  struct netmap_ring *shadow_ring,
98			  uint32_t num_slots)
99{
100	/*
101	 * We place a memory barrier to make sure that the update of head never
102	 * overtakes the update of cur.
103	 * (see explanation in sync_kloop_kernel_write).
104	 */
105	CSB_READ(ptr, head, shadow_ring->head);
106	nm_ldld_barrier();
107	CSB_READ(ptr, cur, shadow_ring->cur);
108	CSB_READ(ptr, sync_flags, shadow_ring->flags);
109
110	/* Make sure that loads from atok->head and atok->cur are not delayed
111	 * after the loads from the netmap ring. */
112	nm_ldld_barrier();
113}
114
115/* Enable or disable application --> kernel kicks. */
116static inline void
117csb_ktoa_kick_enable(struct nm_csb_ktoa __user *csb_ktoa, uint32_t val)
118{
119	CSB_WRITE(csb_ktoa, kern_need_kick, val);
120}
121
122#ifdef SYNC_KLOOP_POLL
123/* Are application interrupt enabled or disabled? */
124static inline uint32_t
125csb_atok_intr_enabled(struct nm_csb_atok __user *csb_atok)
126{
127	uint32_t v;
128
129	CSB_READ(csb_atok, appl_need_kick, v);
130
131	return v;
132}
133#endif  /* SYNC_KLOOP_POLL */
134
135static inline void
136sync_kloop_kring_dump(const char *title, const struct netmap_kring *kring)
137{
138	nm_prinf("%s, kring %s, hwcur %d, rhead %d, "
139		"rcur %d, rtail %d, hwtail %d",
140		title, kring->name, kring->nr_hwcur, kring->rhead,
141		kring->rcur, kring->rtail, kring->nr_hwtail);
142}
143
144/* Arguments for netmap_sync_kloop_tx_ring() and
145 * netmap_sync_kloop_rx_ring().
146 */
147struct sync_kloop_ring_args {
148	struct netmap_kring *kring;
149	struct nm_csb_atok *csb_atok;
150	struct nm_csb_ktoa *csb_ktoa;
151#ifdef SYNC_KLOOP_POLL
152	struct eventfd_ctx *irq_ctx;
153#endif /* SYNC_KLOOP_POLL */
154	/* Are we busy waiting rather than using a schedule() loop ? */
155	bool busy_wait;
156	/* Are we processing in the context of VM exit ? */
157	bool direct;
158};
159
160static void
161netmap_sync_kloop_tx_ring(const struct sync_kloop_ring_args *a)
162{
163	struct netmap_kring *kring = a->kring;
164	struct nm_csb_atok *csb_atok = a->csb_atok;
165	struct nm_csb_ktoa *csb_ktoa = a->csb_ktoa;
166	struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */
167	bool more_txspace = false;
168	uint32_t num_slots;
169	int batch;
170
171	if (unlikely(nm_kr_tryget(kring, 1, NULL))) {
172		return;
173	}
174
175	num_slots = kring->nkr_num_slots;
176
177	/* Disable application --> kernel notifications. */
178	if (!a->direct) {
179		csb_ktoa_kick_enable(csb_ktoa, 0);
180	}
181	/* Copy the application kring pointers from the CSB */
182	sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
183
184	for (;;) {
185		batch = shadow_ring.head - kring->nr_hwcur;
186		if (batch < 0)
187			batch += num_slots;
188
189#ifdef PTN_TX_BATCH_LIM
190		if (batch > PTN_TX_BATCH_LIM(num_slots)) {
191			/* If application moves ahead too fast, let's cut the move so
192			 * that we don't exceed our batch limit. */
193			uint32_t head_lim = kring->nr_hwcur + PTN_TX_BATCH_LIM(num_slots);
194
195			if (head_lim >= num_slots)
196				head_lim -= num_slots;
197			nm_prdis(1, "batch: %d head: %d head_lim: %d", batch, shadow_ring.head,
198					head_lim);
199			shadow_ring.head = head_lim;
200			batch = PTN_TX_BATCH_LIM(num_slots);
201		}
202#endif /* PTN_TX_BATCH_LIM */
203
204		if (nm_kr_txspace(kring) <= (num_slots >> 1)) {
205			shadow_ring.flags |= NAF_FORCE_RECLAIM;
206		}
207
208		/* Netmap prologue */
209		shadow_ring.tail = kring->rtail;
210		if (unlikely(nm_txsync_prologue(kring, &shadow_ring) >= num_slots)) {
211			/* Reinit ring and enable notifications. */
212			netmap_ring_reinit(kring);
213			if (!a->busy_wait) {
214				csb_ktoa_kick_enable(csb_ktoa, 1);
215			}
216			break;
217		}
218
219		if (unlikely(netmap_debug & NM_DEBUG_TXSYNC)) {
220			sync_kloop_kring_dump("pre txsync", kring);
221		}
222
223		if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) {
224			if (!a->busy_wait) {
225				/* Reenable notifications. */
226				csb_ktoa_kick_enable(csb_ktoa, 1);
227			}
228			nm_prerr("txsync() failed");
229			break;
230		}
231
232		/*
233		 * Finalize
234		 * Copy kernel hwcur and hwtail into the CSB for the application sync(), and
235		 * do the nm_sync_finalize.
236		 */
237		sync_kloop_kernel_write(csb_ktoa, kring->nr_hwcur,
238				kring->nr_hwtail);
239		if (kring->rtail != kring->nr_hwtail) {
240			/* Some more room available in the parent adapter. */
241			kring->rtail = kring->nr_hwtail;
242			more_txspace = true;
243		}
244
245		if (unlikely(netmap_debug & NM_DEBUG_TXSYNC)) {
246			sync_kloop_kring_dump("post txsync", kring);
247		}
248
249		/* Interrupt the application if needed. */
250#ifdef SYNC_KLOOP_POLL
251		if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) {
252			/* We could disable kernel --> application kicks here,
253			 * to avoid spurious interrupts. */
254			eventfd_signal(a->irq_ctx, 1);
255			more_txspace = false;
256		}
257#endif /* SYNC_KLOOP_POLL */
258
259		/* Read CSB to see if there is more work to do. */
260		sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
261		if (shadow_ring.head == kring->rhead) {
262			if (a->busy_wait) {
263				break;
264			}
265			/*
266			 * No more packets to transmit. We enable notifications and
267			 * go to sleep, waiting for a kick from the application when new
268			 * new slots are ready for transmission.
269			 */
270			/* Reenable notifications. */
271			csb_ktoa_kick_enable(csb_ktoa, 1);
272			/* Double check, with store-load memory barrier. */
273			nm_stld_barrier();
274			sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
275			if (shadow_ring.head != kring->rhead) {
276				/* We won the race condition, there are more packets to
277				 * transmit. Disable notifications and do another cycle */
278				csb_ktoa_kick_enable(csb_ktoa, 0);
279				continue;
280			}
281			break;
282		}
283
284		if (nm_kr_txempty(kring)) {
285			/* No more available TX slots. We stop waiting for a notification
286			 * from the backend (netmap_tx_irq). */
287			nm_prdis(1, "TX ring");
288			break;
289		}
290	}
291
292	nm_kr_put(kring);
293
294#ifdef SYNC_KLOOP_POLL
295	if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) {
296		eventfd_signal(a->irq_ctx, 1);
297	}
298#endif /* SYNC_KLOOP_POLL */
299}
300
301/* RX cycle without receive any packets */
302#define SYNC_LOOP_RX_DRY_CYCLES_MAX	2
303
304static inline int
305sync_kloop_norxslots(struct netmap_kring *kring, uint32_t g_head)
306{
307	return (NM_ACCESS_ONCE(kring->nr_hwtail) == nm_prev(g_head,
308				kring->nkr_num_slots - 1));
309}
310
311static void
312netmap_sync_kloop_rx_ring(const struct sync_kloop_ring_args *a)
313{
314
315	struct netmap_kring *kring = a->kring;
316	struct nm_csb_atok *csb_atok = a->csb_atok;
317	struct nm_csb_ktoa *csb_ktoa = a->csb_ktoa;
318	struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */
319	int dry_cycles = 0;
320	bool some_recvd = false;
321	uint32_t num_slots;
322
323	if (unlikely(nm_kr_tryget(kring, 1, NULL))) {
324		return;
325	}
326
327	num_slots = kring->nkr_num_slots;
328
329	/* Get RX csb_atok and csb_ktoa pointers from the CSB. */
330	num_slots = kring->nkr_num_slots;
331
332	/* Disable notifications. */
333	if (!a->direct) {
334		csb_ktoa_kick_enable(csb_ktoa, 0);
335	}
336	/* Copy the application kring pointers from the CSB */
337	sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
338
339	for (;;) {
340		uint32_t hwtail;
341
342		/* Netmap prologue */
343		shadow_ring.tail = kring->rtail;
344		if (unlikely(nm_rxsync_prologue(kring, &shadow_ring) >= num_slots)) {
345			/* Reinit ring and enable notifications. */
346			netmap_ring_reinit(kring);
347			if (!a->busy_wait) {
348				csb_ktoa_kick_enable(csb_ktoa, 1);
349			}
350			break;
351		}
352
353		if (unlikely(netmap_debug & NM_DEBUG_RXSYNC)) {
354			sync_kloop_kring_dump("pre rxsync", kring);
355		}
356
357		if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) {
358			if (!a->busy_wait) {
359				/* Reenable notifications. */
360				csb_ktoa_kick_enable(csb_ktoa, 1);
361			}
362			nm_prerr("rxsync() failed");
363			break;
364		}
365
366		/*
367		 * Finalize
368		 * Copy kernel hwcur and hwtail into the CSB for the application sync()
369		 */
370		hwtail = NM_ACCESS_ONCE(kring->nr_hwtail);
371		sync_kloop_kernel_write(csb_ktoa, kring->nr_hwcur, hwtail);
372		if (kring->rtail != hwtail) {
373			kring->rtail = hwtail;
374			some_recvd = true;
375			dry_cycles = 0;
376		} else {
377			dry_cycles++;
378		}
379
380		if (unlikely(netmap_debug & NM_DEBUG_RXSYNC)) {
381			sync_kloop_kring_dump("post rxsync", kring);
382		}
383
384#ifdef SYNC_KLOOP_POLL
385		/* Interrupt the application if needed. */
386		if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) {
387			/* We could disable kernel --> application kicks here,
388			 * to avoid spurious interrupts. */
389			eventfd_signal(a->irq_ctx, 1);
390			some_recvd = false;
391		}
392#endif /* SYNC_KLOOP_POLL */
393
394		/* Read CSB to see if there is more work to do. */
395		sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
396		if (sync_kloop_norxslots(kring, shadow_ring.head)) {
397			if (a->busy_wait) {
398				break;
399			}
400			/*
401			 * No more slots available for reception. We enable notification and
402			 * go to sleep, waiting for a kick from the application when new receive
403			 * slots are available.
404			 */
405			/* Reenable notifications. */
406			csb_ktoa_kick_enable(csb_ktoa, 1);
407			/* Double check, with store-load memory barrier. */
408			nm_stld_barrier();
409			sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
410			if (!sync_kloop_norxslots(kring, shadow_ring.head)) {
411				/* We won the race condition, more slots are available. Disable
412				 * notifications and do another cycle. */
413				csb_ktoa_kick_enable(csb_ktoa, 0);
414				continue;
415			}
416			break;
417		}
418
419		hwtail = NM_ACCESS_ONCE(kring->nr_hwtail);
420		if (unlikely(hwtail == kring->rhead ||
421					dry_cycles >= SYNC_LOOP_RX_DRY_CYCLES_MAX)) {
422			/* No more packets to be read from the backend. We stop and
423			 * wait for a notification from the backend (netmap_rx_irq). */
424			nm_prdis(1, "nr_hwtail: %d rhead: %d dry_cycles: %d",
425					hwtail, kring->rhead, dry_cycles);
426			break;
427		}
428	}
429
430	nm_kr_put(kring);
431
432#ifdef SYNC_KLOOP_POLL
433	/* Interrupt the application if needed. */
434	if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) {
435		eventfd_signal(a->irq_ctx, 1);
436	}
437#endif /* SYNC_KLOOP_POLL */
438}
439
440#ifdef SYNC_KLOOP_POLL
441struct sync_kloop_poll_ctx;
442struct sync_kloop_poll_entry {
443	/* Support for receiving notifications from
444	 * a netmap ring or from the application. */
445	struct file *filp;
446	wait_queue_t wait;
447	wait_queue_head_t *wqh;
448
449	/* Support for sending notifications to the application. */
450	struct eventfd_ctx *irq_ctx;
451	struct file *irq_filp;
452
453	/* Arguments for the ring processing function. Useful
454	 * in case of custom wake-up function. */
455	struct sync_kloop_ring_args *args;
456	struct sync_kloop_poll_ctx *parent;
457
458};
459
460struct sync_kloop_poll_ctx {
461	poll_table wait_table;
462	unsigned int next_entry;
463	int (*next_wake_fun)(wait_queue_t *, unsigned, int, void *);
464	unsigned int num_entries;
465	unsigned int num_tx_rings;
466	unsigned int num_rings;
467	/* First num_tx_rings entries are for the TX kicks.
468	 * Then the RX kicks entries follow. The last two
469	 * entries are for TX irq, and RX irq. */
470	struct sync_kloop_poll_entry entries[0];
471};
472
473static void
474sync_kloop_poll_table_queue_proc(struct file *file, wait_queue_head_t *wqh,
475				poll_table *pt)
476{
477	struct sync_kloop_poll_ctx *poll_ctx =
478		container_of(pt, struct sync_kloop_poll_ctx, wait_table);
479	struct sync_kloop_poll_entry *entry = poll_ctx->entries +
480						poll_ctx->next_entry;
481
482	BUG_ON(poll_ctx->next_entry >= poll_ctx->num_entries);
483	entry->wqh = wqh;
484	entry->filp = file;
485	/* Use the default wake up function. */
486	if (poll_ctx->next_wake_fun == NULL) {
487		init_waitqueue_entry(&entry->wait, current);
488	} else {
489		init_waitqueue_func_entry(&entry->wait,
490		    poll_ctx->next_wake_fun);
491	}
492	add_wait_queue(wqh, &entry->wait);
493}
494
495static int
496sync_kloop_tx_kick_wake_fun(wait_queue_t *wait, unsigned mode,
497    int wake_flags, void *key)
498{
499	struct sync_kloop_poll_entry *entry =
500	    container_of(wait, struct sync_kloop_poll_entry, wait);
501
502	netmap_sync_kloop_tx_ring(entry->args);
503
504	return 0;
505}
506
507static int
508sync_kloop_tx_irq_wake_fun(wait_queue_t *wait, unsigned mode,
509    int wake_flags, void *key)
510{
511	struct sync_kloop_poll_entry *entry =
512	    container_of(wait, struct sync_kloop_poll_entry, wait);
513	struct sync_kloop_poll_ctx *poll_ctx = entry->parent;
514	int i;
515
516	for (i = 0; i < poll_ctx->num_tx_rings; i++) {
517		struct eventfd_ctx *irq_ctx = poll_ctx->entries[i].irq_ctx;
518
519		if (irq_ctx) {
520			eventfd_signal(irq_ctx, 1);
521		}
522	}
523
524	return 0;
525}
526
527static int
528sync_kloop_rx_kick_wake_fun(wait_queue_t *wait, unsigned mode,
529    int wake_flags, void *key)
530{
531	struct sync_kloop_poll_entry *entry =
532	    container_of(wait, struct sync_kloop_poll_entry, wait);
533
534	netmap_sync_kloop_rx_ring(entry->args);
535
536	return 0;
537}
538
539static int
540sync_kloop_rx_irq_wake_fun(wait_queue_t *wait, unsigned mode,
541    int wake_flags, void *key)
542{
543	struct sync_kloop_poll_entry *entry =
544	    container_of(wait, struct sync_kloop_poll_entry, wait);
545	struct sync_kloop_poll_ctx *poll_ctx = entry->parent;
546	int i;
547
548	for (i = poll_ctx->num_tx_rings; i < poll_ctx->num_rings; i++) {
549		struct eventfd_ctx *irq_ctx = poll_ctx->entries[i].irq_ctx;
550
551		if (irq_ctx) {
552			eventfd_signal(irq_ctx, 1);
553		}
554	}
555
556	return 0;
557}
558#endif  /* SYNC_KLOOP_POLL */
559
560int
561netmap_sync_kloop(struct netmap_priv_d *priv, struct nmreq_header *hdr)
562{
563	struct nmreq_sync_kloop_start *req =
564		(struct nmreq_sync_kloop_start *)(uintptr_t)hdr->nr_body;
565	struct nmreq_opt_sync_kloop_eventfds *eventfds_opt = NULL;
566#ifdef SYNC_KLOOP_POLL
567	struct sync_kloop_poll_ctx *poll_ctx = NULL;
568#endif  /* SYNC_KLOOP_POLL */
569	int num_rx_rings, num_tx_rings, num_rings;
570	struct sync_kloop_ring_args *args = NULL;
571	uint32_t sleep_us = req->sleep_us;
572	struct nm_csb_atok* csb_atok_base;
573	struct nm_csb_ktoa* csb_ktoa_base;
574	struct netmap_adapter *na;
575	struct nmreq_option *opt;
576	bool na_could_sleep = false;
577	bool busy_wait = true;
578	bool direct_tx = false;
579	bool direct_rx = false;
580	int err = 0;
581	int i;
582
583	if (sleep_us > 1000000) {
584		/* We do not accept sleeping for more than a second. */
585		return EINVAL;
586	}
587
588	if (priv->np_nifp == NULL) {
589		return ENXIO;
590	}
591	mb(); /* make sure following reads are not from cache */
592
593	na = priv->np_na;
594	if (!nm_netmap_on(na)) {
595		return ENXIO;
596	}
597
598	NMG_LOCK();
599	/* Make sure the application is working in CSB mode. */
600	if (!priv->np_csb_atok_base || !priv->np_csb_ktoa_base) {
601		NMG_UNLOCK();
602		nm_prerr("sync-kloop on %s requires "
603				"NETMAP_REQ_OPT_CSB option", na->name);
604		return EINVAL;
605	}
606
607	csb_atok_base = priv->np_csb_atok_base;
608	csb_ktoa_base = priv->np_csb_ktoa_base;
609
610	/* Make sure that no kloop is currently running. */
611	if (priv->np_kloop_state & NM_SYNC_KLOOP_RUNNING) {
612		err = EBUSY;
613	}
614	priv->np_kloop_state |= NM_SYNC_KLOOP_RUNNING;
615	NMG_UNLOCK();
616	if (err) {
617		return err;
618	}
619
620	num_rx_rings = priv->np_qlast[NR_RX] - priv->np_qfirst[NR_RX];
621	num_tx_rings = priv->np_qlast[NR_TX] - priv->np_qfirst[NR_TX];
622	num_rings = num_tx_rings + num_rx_rings;
623
624	args = nm_os_malloc(num_rings * sizeof(args[0]));
625	if (!args) {
626		err = ENOMEM;
627		goto out;
628	}
629
630	/* Prepare the arguments for netmap_sync_kloop_tx_ring()
631	 * and netmap_sync_kloop_rx_ring(). */
632	for (i = 0; i < num_tx_rings; i++) {
633		struct sync_kloop_ring_args *a = args + i;
634
635		a->kring = NMR(na, NR_TX)[i + priv->np_qfirst[NR_TX]];
636		a->csb_atok = csb_atok_base + i;
637		a->csb_ktoa = csb_ktoa_base + i;
638		a->busy_wait = busy_wait;
639		a->direct = direct_tx;
640	}
641	for (i = 0; i < num_rx_rings; i++) {
642		struct sync_kloop_ring_args *a = args + num_tx_rings + i;
643
644		a->kring = NMR(na, NR_RX)[i + priv->np_qfirst[NR_RX]];
645		a->csb_atok = csb_atok_base + num_tx_rings + i;
646		a->csb_ktoa = csb_ktoa_base + num_tx_rings + i;
647		a->busy_wait = busy_wait;
648		a->direct = direct_rx;
649	}
650
651	/* Validate notification options. */
652	opt = nmreq_findoption((struct nmreq_option *)(uintptr_t)hdr->nr_options,
653				NETMAP_REQ_OPT_SYNC_KLOOP_MODE);
654	if (opt != NULL) {
655		struct nmreq_opt_sync_kloop_mode *mode_opt =
656		    (struct nmreq_opt_sync_kloop_mode *)opt;
657
658		direct_tx = !!(mode_opt->mode & NM_OPT_SYNC_KLOOP_DIRECT_TX);
659		direct_rx = !!(mode_opt->mode & NM_OPT_SYNC_KLOOP_DIRECT_RX);
660		if (mode_opt->mode & ~(NM_OPT_SYNC_KLOOP_DIRECT_TX |
661		    NM_OPT_SYNC_KLOOP_DIRECT_RX)) {
662			opt->nro_status = err = EINVAL;
663			goto out;
664		}
665		opt->nro_status = 0;
666	}
667	opt = nmreq_findoption((struct nmreq_option *)(uintptr_t)hdr->nr_options,
668				NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS);
669	if (opt != NULL) {
670		err = nmreq_checkduplicate(opt);
671		if (err) {
672			opt->nro_status = err;
673			goto out;
674		}
675		if (opt->nro_size != sizeof(*eventfds_opt) +
676			sizeof(eventfds_opt->eventfds[0]) * num_rings) {
677			/* Option size not consistent with the number of
678			 * entries. */
679			opt->nro_status = err = EINVAL;
680			goto out;
681		}
682#ifdef SYNC_KLOOP_POLL
683		eventfds_opt = (struct nmreq_opt_sync_kloop_eventfds *)opt;
684		opt->nro_status = 0;
685
686		/* Check if some ioeventfd entry is not defined, and force sleep
687		 * synchronization in that case. */
688		busy_wait = false;
689		for (i = 0; i < num_rings; i++) {
690			if (eventfds_opt->eventfds[i].ioeventfd < 0) {
691				busy_wait = true;
692				break;
693			}
694		}
695
696		if (busy_wait && (direct_tx || direct_rx)) {
697			/* For direct processing we need all the
698			 * ioeventfds to be valid. */
699			opt->nro_status = err = EINVAL;
700			goto out;
701		}
702
703		/* We need 2 poll entries for TX and RX notifications coming
704		 * from the netmap adapter, plus one entries per ring for the
705		 * notifications coming from the application. */
706		poll_ctx = nm_os_malloc(sizeof(*poll_ctx) +
707				(num_rings + 2) * sizeof(poll_ctx->entries[0]));
708		init_poll_funcptr(&poll_ctx->wait_table,
709					sync_kloop_poll_table_queue_proc);
710		poll_ctx->num_entries = 2 + num_rings;
711		poll_ctx->num_tx_rings = num_tx_rings;
712		poll_ctx->num_rings = num_rings;
713		poll_ctx->next_entry = 0;
714		poll_ctx->next_wake_fun = NULL;
715
716		if (direct_tx && (na->na_flags & NAF_BDG_MAYSLEEP)) {
717			/* In direct mode, VALE txsync is called from
718			 * wake-up context, where it is not possible
719			 * to sleep.
720			 */
721			na->na_flags &= ~NAF_BDG_MAYSLEEP;
722			na_could_sleep = true;
723		}
724
725		for (i = 0; i < num_rings + 2; i++) {
726			poll_ctx->entries[i].args = args + i;
727			poll_ctx->entries[i].parent = poll_ctx;
728		}
729
730		/* Poll for notifications coming from the applications through
731		 * eventfds. */
732		for (i = 0; i < num_rings; i++, poll_ctx->next_entry++) {
733			struct eventfd_ctx *irq = NULL;
734			struct file *filp = NULL;
735			unsigned long mask;
736			bool tx_ring = (i < num_tx_rings);
737
738			if (eventfds_opt->eventfds[i].irqfd >= 0) {
739				filp = eventfd_fget(
740				    eventfds_opt->eventfds[i].irqfd);
741				if (IS_ERR(filp)) {
742					err = PTR_ERR(filp);
743					goto out;
744				}
745				irq = eventfd_ctx_fileget(filp);
746				if (IS_ERR(irq)) {
747					err = PTR_ERR(irq);
748					goto out;
749				}
750			}
751			poll_ctx->entries[i].irq_filp = filp;
752			poll_ctx->entries[i].irq_ctx = irq;
753			poll_ctx->entries[i].args->busy_wait = busy_wait;
754			/* Don't let netmap_sync_kloop_*x_ring() use
755			 * IRQs in direct mode. */
756			poll_ctx->entries[i].args->irq_ctx =
757			    ((tx_ring && direct_tx) ||
758			    (!tx_ring && direct_rx)) ? NULL :
759			    poll_ctx->entries[i].irq_ctx;
760			poll_ctx->entries[i].args->direct =
761			    (tx_ring ? direct_tx : direct_rx);
762
763			if (!busy_wait) {
764				filp = eventfd_fget(
765				    eventfds_opt->eventfds[i].ioeventfd);
766				if (IS_ERR(filp)) {
767					err = PTR_ERR(filp);
768					goto out;
769				}
770				if (tx_ring && direct_tx) {
771					/* Override the wake up function
772					 * so that it can directly call
773					 * netmap_sync_kloop_tx_ring().
774					 */
775					poll_ctx->next_wake_fun =
776					    sync_kloop_tx_kick_wake_fun;
777				} else if (!tx_ring && direct_rx) {
778					/* Same for direct RX. */
779					poll_ctx->next_wake_fun =
780					    sync_kloop_rx_kick_wake_fun;
781				} else {
782					poll_ctx->next_wake_fun = NULL;
783				}
784				mask = filp->f_op->poll(filp,
785				    &poll_ctx->wait_table);
786				if (mask & POLLERR) {
787					err = EINVAL;
788					goto out;
789				}
790			}
791		}
792
793		/* Poll for notifications coming from the netmap rings bound to
794		 * this file descriptor. */
795		if (!busy_wait) {
796			NMG_LOCK();
797			/* In direct mode, override the wake up function so
798			 * that it can forward the netmap_tx_irq() to the
799			 * guest. */
800			poll_ctx->next_wake_fun = direct_tx ?
801			    sync_kloop_tx_irq_wake_fun : NULL;
802			poll_wait(priv->np_filp, priv->np_si[NR_TX],
803			    &poll_ctx->wait_table);
804			poll_ctx->next_entry++;
805
806			poll_ctx->next_wake_fun = direct_rx ?
807			    sync_kloop_rx_irq_wake_fun : NULL;
808			poll_wait(priv->np_filp, priv->np_si[NR_RX],
809			    &poll_ctx->wait_table);
810			poll_ctx->next_entry++;
811			NMG_UNLOCK();
812		}
813#else   /* SYNC_KLOOP_POLL */
814		opt->nro_status = EOPNOTSUPP;
815		goto out;
816#endif  /* SYNC_KLOOP_POLL */
817	}
818
819	nm_prinf("kloop busy_wait %u, direct_tx %u, direct_rx %u, "
820	    "na_could_sleep %u", busy_wait, direct_tx, direct_rx,
821	    na_could_sleep);
822
823	/* Main loop. */
824	for (;;) {
825		if (unlikely(NM_ACCESS_ONCE(priv->np_kloop_state) & NM_SYNC_KLOOP_STOPPING)) {
826			break;
827		}
828
829#ifdef SYNC_KLOOP_POLL
830		if (!busy_wait) {
831			/* It is important to set the task state as
832			 * interruptible before processing any TX/RX ring,
833			 * so that if a notification on ring Y comes after
834			 * we have processed ring Y, but before we call
835			 * schedule(), we don't miss it. This is true because
836			 * the wake up function will change the the task state,
837			 * and therefore the schedule_timeout() call below
838			 * will observe the change).
839			 */
840			set_current_state(TASK_INTERRUPTIBLE);
841		}
842#endif  /* SYNC_KLOOP_POLL */
843
844		/* Process all the TX rings bound to this file descriptor. */
845		for (i = 0; !direct_tx && i < num_tx_rings; i++) {
846			struct sync_kloop_ring_args *a = args + i;
847			netmap_sync_kloop_tx_ring(a);
848		}
849
850		/* Process all the RX rings bound to this file descriptor. */
851		for (i = 0; !direct_rx && i < num_rx_rings; i++) {
852			struct sync_kloop_ring_args *a = args + num_tx_rings + i;
853			netmap_sync_kloop_rx_ring(a);
854		}
855
856		if (busy_wait) {
857			/* Default synchronization method: sleep for a while. */
858			usleep_range(sleep_us, sleep_us);
859		}
860#ifdef SYNC_KLOOP_POLL
861		else {
862			/* Yield to the scheduler waiting for a notification
863			 * to come either from netmap or the application. */
864			schedule_timeout(msecs_to_jiffies(3000));
865		}
866#endif /* SYNC_KLOOP_POLL */
867	}
868out:
869#ifdef SYNC_KLOOP_POLL
870	if (poll_ctx) {
871		/* Stop polling from netmap and the eventfds, and deallocate
872		 * the poll context. */
873		if (!busy_wait) {
874			__set_current_state(TASK_RUNNING);
875		}
876		for (i = 0; i < poll_ctx->next_entry; i++) {
877			struct sync_kloop_poll_entry *entry =
878						poll_ctx->entries + i;
879
880			if (entry->wqh)
881				remove_wait_queue(entry->wqh, &entry->wait);
882			/* We did not get a reference to the eventfds, but
883			 * don't do that on netmap file descriptors (since
884			 * a reference was not taken. */
885			if (entry->filp && entry->filp != priv->np_filp)
886				fput(entry->filp);
887			if (entry->irq_ctx)
888				eventfd_ctx_put(entry->irq_ctx);
889			if (entry->irq_filp)
890				fput(entry->irq_filp);
891		}
892		nm_os_free(poll_ctx);
893		poll_ctx = NULL;
894	}
895#endif /* SYNC_KLOOP_POLL */
896
897	if (args) {
898		nm_os_free(args);
899		args = NULL;
900	}
901
902	/* Reset the kloop state. */
903	NMG_LOCK();
904	priv->np_kloop_state = 0;
905	if (na_could_sleep) {
906		na->na_flags |= NAF_BDG_MAYSLEEP;
907	}
908	NMG_UNLOCK();
909
910	return err;
911}
912
913int
914netmap_sync_kloop_stop(struct netmap_priv_d *priv)
915{
916	struct netmap_adapter *na;
917	bool running = true;
918	int err = 0;
919
920	if (priv->np_nifp == NULL) {
921		return ENXIO;
922	}
923	mb(); /* make sure following reads are not from cache */
924
925	na = priv->np_na;
926	if (!nm_netmap_on(na)) {
927		return ENXIO;
928	}
929
930	/* Set the kloop stopping flag. */
931	NMG_LOCK();
932	priv->np_kloop_state |= NM_SYNC_KLOOP_STOPPING;
933	NMG_UNLOCK();
934
935	/* Send a notification to the kloop, in case it is blocked in
936	 * schedule_timeout(). We can use either RX or TX, because the
937	 * kloop is waiting on both. */
938	nm_os_selwakeup(priv->np_si[NR_RX]);
939
940	/* Wait for the kloop to actually terminate. */
941	while (running) {
942		usleep_range(1000, 1500);
943		NMG_LOCK();
944		running = (NM_ACCESS_ONCE(priv->np_kloop_state)
945				& NM_SYNC_KLOOP_RUNNING);
946		NMG_UNLOCK();
947	}
948
949	return err;
950}
951
952#ifdef WITH_PTNETMAP
953/*
954 * Guest ptnetmap txsync()/rxsync() routines, used in ptnet device drivers.
955 * These routines are reused across the different operating systems supported
956 * by netmap.
957 */
958
959/*
960 * Reconcile host and guest views of the transmit ring.
961 *
962 * Guest user wants to transmit packets up to the one before ring->head,
963 * and guest kernel knows tx_ring->hwcur is the first packet unsent
964 * by the host kernel.
965 *
966 * We push out as many packets as possible, and possibly
967 * reclaim buffers from previously completed transmission.
968 *
969 * Notifications from the host are enabled only if the user guest would
970 * block (no space in the ring).
971 */
972bool
973netmap_pt_guest_txsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa,
974			struct netmap_kring *kring, int flags)
975{
976	bool notify = false;
977
978	/* Disable notifications */
979	atok->appl_need_kick = 0;
980
981	/*
982	 * First part: tell the host to process the new packets,
983	 * updating the CSB.
984	 */
985	kring->nr_hwcur = ktoa->hwcur;
986	nm_sync_kloop_appl_write(atok, kring->rcur, kring->rhead);
987
988        /* Ask for a kick from a guest to the host if needed. */
989	if (((kring->rhead != kring->nr_hwcur || nm_kr_wouldblock(kring))
990		&& NM_ACCESS_ONCE(ktoa->kern_need_kick)) ||
991			(flags & NAF_FORCE_RECLAIM)) {
992		atok->sync_flags = flags;
993		notify = true;
994	}
995
996	/*
997	 * Second part: reclaim buffers for completed transmissions.
998	 */
999	if (nm_kr_wouldblock(kring) || (flags & NAF_FORCE_RECLAIM)) {
1000		nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail,
1001					&kring->nr_hwcur);
1002	}
1003
1004        /*
1005         * No more room in the ring for new transmissions. The user thread will
1006	 * go to sleep and we need to be notified by the host when more free
1007	 * space is available.
1008         */
1009	if (nm_kr_wouldblock(kring) && !(kring->nr_kflags & NKR_NOINTR)) {
1010		/* Reenable notifications. */
1011		atok->appl_need_kick = 1;
1012                /* Double check, with store-load memory barrier. */
1013		nm_stld_barrier();
1014		nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail,
1015					&kring->nr_hwcur);
1016                /* If there is new free space, disable notifications */
1017		if (unlikely(!nm_kr_wouldblock(kring))) {
1018			atok->appl_need_kick = 0;
1019		}
1020	}
1021
1022	nm_prdis(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)",
1023		kring->name, atok->head, atok->cur, ktoa->hwtail,
1024		kring->rhead, kring->rcur, kring->nr_hwtail);
1025
1026	return notify;
1027}
1028
1029/*
1030 * Reconcile host and guest view of the receive ring.
1031 *
1032 * Update hwcur/hwtail from host (reading from CSB).
1033 *
1034 * If guest user has released buffers up to the one before ring->head, we
1035 * also give them to the host.
1036 *
1037 * Notifications from the host are enabled only if the user guest would
1038 * block (no more completed slots in the ring).
1039 */
1040bool
1041netmap_pt_guest_rxsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa,
1042			struct netmap_kring *kring, int flags)
1043{
1044	bool notify = false;
1045
1046        /* Disable notifications */
1047	atok->appl_need_kick = 0;
1048
1049	/*
1050	 * First part: import newly received packets, by updating the kring
1051	 * hwtail to the hwtail known from the host (read from the CSB).
1052	 * This also updates the kring hwcur.
1053	 */
1054	nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail, &kring->nr_hwcur);
1055	kring->nr_kflags &= ~NKR_PENDINTR;
1056
1057	/*
1058	 * Second part: tell the host about the slots that guest user has
1059	 * released, by updating cur and head in the CSB.
1060	 */
1061	if (kring->rhead != kring->nr_hwcur) {
1062		nm_sync_kloop_appl_write(atok, kring->rcur, kring->rhead);
1063	}
1064
1065        /*
1066         * No more completed RX slots. The user thread will go to sleep and
1067	 * we need to be notified by the host when more RX slots have been
1068	 * completed.
1069         */
1070	if (nm_kr_wouldblock(kring) && !(kring->nr_kflags & NKR_NOINTR)) {
1071		/* Reenable notifications. */
1072                atok->appl_need_kick = 1;
1073                /* Double check, with store-load memory barrier. */
1074		nm_stld_barrier();
1075		nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail,
1076					&kring->nr_hwcur);
1077                /* If there are new slots, disable notifications. */
1078		if (!nm_kr_wouldblock(kring)) {
1079                        atok->appl_need_kick = 0;
1080                }
1081        }
1082
1083	/* Ask for a kick from the guest to the host if needed. */
1084	if ((kring->rhead != kring->nr_hwcur || nm_kr_wouldblock(kring))
1085		&& NM_ACCESS_ONCE(ktoa->kern_need_kick)) {
1086		atok->sync_flags = flags;
1087		notify = true;
1088	}
1089
1090	nm_prdis(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)",
1091		kring->name, atok->head, atok->cur, ktoa->hwtail,
1092		kring->rhead, kring->rcur, kring->nr_hwtail);
1093
1094	return notify;
1095}
1096
1097/*
1098 * Callbacks for ptnet drivers: nm_krings_create, nm_krings_delete, nm_dtor.
1099 */
1100int
1101ptnet_nm_krings_create(struct netmap_adapter *na)
1102{
1103	struct netmap_pt_guest_adapter *ptna =
1104			(struct netmap_pt_guest_adapter *)na; /* Upcast. */
1105	struct netmap_adapter *na_nm = &ptna->hwup.up;
1106	struct netmap_adapter *na_dr = &ptna->dr.up;
1107	int ret;
1108
1109	if (ptna->backend_users) {
1110		return 0;
1111	}
1112
1113	/* Create krings on the public netmap adapter. */
1114	ret = netmap_hw_krings_create(na_nm);
1115	if (ret) {
1116		return ret;
1117	}
1118
1119	/* Copy krings into the netmap adapter private to the driver. */
1120	na_dr->tx_rings = na_nm->tx_rings;
1121	na_dr->rx_rings = na_nm->rx_rings;
1122
1123	return 0;
1124}
1125
1126void
1127ptnet_nm_krings_delete(struct netmap_adapter *na)
1128{
1129	struct netmap_pt_guest_adapter *ptna =
1130			(struct netmap_pt_guest_adapter *)na; /* Upcast. */
1131	struct netmap_adapter *na_nm = &ptna->hwup.up;
1132	struct netmap_adapter *na_dr = &ptna->dr.up;
1133
1134	if (ptna->backend_users) {
1135		return;
1136	}
1137
1138	na_dr->tx_rings = NULL;
1139	na_dr->rx_rings = NULL;
1140
1141	netmap_hw_krings_delete(na_nm);
1142}
1143
1144void
1145ptnet_nm_dtor(struct netmap_adapter *na)
1146{
1147	struct netmap_pt_guest_adapter *ptna =
1148			(struct netmap_pt_guest_adapter *)na;
1149
1150	netmap_mem_put(ptna->dr.up.nm_mem);
1151	memset(&ptna->dr, 0, sizeof(ptna->dr));
1152	netmap_mem_pt_guest_ifp_del(na->nm_mem, na->ifp);
1153}
1154
1155int
1156netmap_pt_guest_attach(struct netmap_adapter *arg,
1157		       unsigned int nifp_offset, unsigned int memid)
1158{
1159	struct netmap_pt_guest_adapter *ptna;
1160	struct ifnet *ifp = arg ? arg->ifp : NULL;
1161	int error;
1162
1163	/* get allocator */
1164	arg->nm_mem = netmap_mem_pt_guest_new(ifp, nifp_offset, memid);
1165	if (arg->nm_mem == NULL)
1166		return ENOMEM;
1167	arg->na_flags |= NAF_MEM_OWNER;
1168	error = netmap_attach_ext(arg, sizeof(struct netmap_pt_guest_adapter), 1);
1169	if (error)
1170		return error;
1171
1172	/* get the netmap_pt_guest_adapter */
1173	ptna = (struct netmap_pt_guest_adapter *) NA(ifp);
1174
1175	/* Initialize a separate pass-through netmap adapter that is going to
1176	 * be used by the ptnet driver only, and so never exposed to netmap
1177         * applications. We only need a subset of the available fields. */
1178	memset(&ptna->dr, 0, sizeof(ptna->dr));
1179	ptna->dr.up.ifp = ifp;
1180	ptna->dr.up.nm_mem = netmap_mem_get(ptna->hwup.up.nm_mem);
1181        ptna->dr.up.nm_config = ptna->hwup.up.nm_config;
1182
1183	ptna->backend_users = 0;
1184
1185	return 0;
1186}
1187
1188#endif /* WITH_PTNETMAP */
1189