1/*
2 * Copyright (C) 2014-2016 Giuseppe Lettieri
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *   1. Redistributions of source code must retain the above copyright
9 *      notice, this list of conditions and the following disclaimer.
10 *   2. Redistributions in binary form must reproduce the above copyright
11 *      notice, this list of conditions and the following disclaimer in the
12 *      documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27/*
28 *
29 * Monitors
30 *
31 * netmap monitors can be used to do monitoring of network traffic
32 * on another adapter, when the latter adapter is working in netmap mode.
33 *
34 * Monitors offer to userspace the same interface as any other netmap port,
35 * with as many pairs of netmap rings as the monitored adapter.
36 * However, only the rx rings are actually used. Each monitor rx ring receives
37 * the traffic transiting on both the tx and rx corresponding rings in the
38 * monitored adapter. During registration, the user can choose if she wants
39 * to intercept tx only, rx only, or both tx and rx traffic.
40 * The slots containing traffic intercepted in the tx direction will have
41 * the NS_TXMON flag set.
42 *
43 * If the monitor is not able to cope with the stream of frames, excess traffic
44 * will be dropped.
45 *
46 * If the monitored adapter leaves netmap mode, the monitor has to be restarted.
47 *
48 * Monitors can be either zero-copy or copy-based.
49 *
50 * Copy monitors see the frames before they are consumed:
51 *
52 *  - For tx traffic, this is when the application sends them, before they are
53 *    passed down to the adapter.
54 *
55 *  - For rx traffic, this is when they are received by the adapter, before
56 *    they are sent up to the application, if any (note that, if no
57 *    application is reading from a monitored ring, the ring will eventually
58 *    fill up and traffic will stop).
59 *
60 * Zero-copy monitors only see the frames after they have been consumed:
61 *
62 *  - For tx traffic, this is after the slots containing the frames have been
63 *    marked as free. Note that this may happen at a considerably delay after
64 *    frame transmission, since freeing of slots is often done lazily.
65 *
66 *  - For rx traffic, this is after the consumer on the monitored adapter
67 *    has released them. In most cases, the consumer is a userspace
68 *    application which may have modified the frame contents.
69 *
70 * Several copy or zero-copy monitors may be active on any ring.
71 *
72 */
73
74
75#if defined(__FreeBSD__)
76#include <sys/cdefs.h> /* prerequisite */
77
78#include <sys/types.h>
79#include <sys/errno.h>
80#include <sys/param.h>	/* defines used in kernel.h */
81#include <sys/kernel.h>	/* types used in module initialization */
82#include <sys/malloc.h>
83#include <sys/poll.h>
84#include <sys/lock.h>
85#include <sys/rwlock.h>
86#include <sys/selinfo.h>
87#include <sys/sysctl.h>
88#include <sys/socket.h> /* sockaddrs */
89#include <net/if.h>
90#include <net/if_var.h>
91#include <machine/bus.h>	/* bus_dmamap_* */
92#include <sys/refcount.h>
93
94
95#elif defined(linux)
96
97#include "bsd_glue.h"
98
99#elif defined(__APPLE__)
100
101#warning OSX support is only partial
102#include "osx_glue.h"
103
104#elif defined(_WIN32)
105#include "win_glue.h"
106#else
107
108#error	Unsupported platform
109
110#endif /* unsupported */
111
112/*
113 * common headers
114 */
115
116#include <net/netmap.h>
117#include <dev/netmap/netmap_kern.h>
118#include <dev/netmap/netmap_mem2.h>
119
120#ifdef WITH_MONITOR
121
122#define NM_MONITOR_MAXSLOTS 4096
123
124/*
125 ********************************************************************
126 * functions common to both kind of monitors
127 ********************************************************************
128 */
129
130static int netmap_zmon_reg(struct netmap_adapter *, int);
131static int
132nm_is_zmon(struct netmap_adapter *na)
133{
134	return na->nm_register == netmap_zmon_reg;
135}
136
137/* nm_sync callback for the monitor's own tx rings.
138 * This makes no sense and always returns error
139 */
140static int
141netmap_monitor_txsync(struct netmap_kring *kring, int flags)
142{
143	nm_prlim(1, "%s %x", kring->name, flags);
144	return EIO;
145}
146
147/* nm_sync callback for the monitor's own rx rings.
148 * Note that the lock in netmap_zmon_parent_sync only protects
149 * writers among themselves. Synchronization between writers
150 * (i.e., netmap_zmon_parent_txsync and netmap_zmon_parent_rxsync)
151 * and readers (i.e., netmap_zmon_rxsync) relies on memory barriers.
152 */
153static int
154netmap_monitor_rxsync(struct netmap_kring *kring, int flags)
155{
156	struct netmap_monitor_adapter *mna =
157		(struct netmap_monitor_adapter *)kring->na;
158	if (unlikely(mna->priv.np_na == NULL)) {
159		/* parent left netmap mode */
160		return EIO;
161	}
162	nm_prdis("%s %x", kring->name, flags);
163	kring->nr_hwcur = kring->rhead;
164	mb();
165	return 0;
166}
167
168/* nm_krings_create callbacks for monitors.
169 */
170static int
171netmap_monitor_krings_create(struct netmap_adapter *na)
172{
173	int error = netmap_krings_create(na, 0);
174	enum txrx t;
175
176	if (error)
177		return error;
178	/* override the host rings callbacks */
179	for_rx_tx(t) {
180		int i;
181		u_int first = nma_get_nrings(na, t);
182		for (i = 0; i < nma_get_host_nrings(na, t); i++) {
183			struct netmap_kring *kring = NMR(na, t)[first + i];
184			kring->nm_sync = t == NR_TX ? netmap_monitor_txsync :
185						      netmap_monitor_rxsync;
186		}
187	}
188	return 0;
189}
190
191/* nm_krings_delete callback for monitors */
192static void
193netmap_monitor_krings_delete(struct netmap_adapter *na)
194{
195	netmap_krings_delete(na);
196}
197
198
199static u_int
200nm_txrx2flag(enum txrx t)
201{
202	return (t == NR_RX ? NR_MONITOR_RX : NR_MONITOR_TX);
203}
204
205/* allocate the monitors array in the monitored kring */
206static int
207nm_monitor_alloc(struct netmap_kring *kring, u_int n)
208{
209	size_t old_len, len;
210	struct netmap_kring **nm;
211
212	if (n <= kring->max_monitors)
213		/* we already have more entries that requested */
214		return 0;
215
216	old_len = sizeof(struct netmap_kring *)*kring->max_monitors;
217	len = sizeof(struct netmap_kring *) * n;
218	nm = nm_os_realloc(kring->monitors, len, old_len);
219	if (nm == NULL)
220		return ENOMEM;
221
222	kring->monitors = nm;
223	kring->max_monitors = n;
224
225	return 0;
226}
227
228/* deallocate the parent array in the parent adapter */
229static void
230nm_monitor_dealloc(struct netmap_kring *kring)
231{
232	if (kring->monitors) {
233		if (kring->n_monitors > 0) {
234			nm_prerr("freeing not empty monitor array for %s (%d dangling monitors)!",
235			    kring->name, kring->n_monitors);
236		}
237		nm_os_free(kring->monitors);
238		kring->monitors = NULL;
239		kring->max_monitors = 0;
240		kring->n_monitors = 0;
241	}
242}
243
244/* returns 1 iff kring has no monitors */
245static inline int
246nm_monitor_none(struct netmap_kring *kring)
247{
248	return kring->n_monitors == 0 &&
249		kring->zmon_list[NR_TX].next == NULL &&
250		kring->zmon_list[NR_RX].next == NULL;
251}
252
253/*
254 * monitors work by replacing the nm_sync() and possibly the
255 * nm_notify() callbacks in the monitored rings.
256 */
257static int netmap_zmon_parent_txsync(struct netmap_kring *, int);
258static int netmap_zmon_parent_rxsync(struct netmap_kring *, int);
259static int netmap_monitor_parent_txsync(struct netmap_kring *, int);
260static int netmap_monitor_parent_rxsync(struct netmap_kring *, int);
261static int netmap_monitor_parent_notify(struct netmap_kring *, int);
262
263static int
264nm_monitor_dummycb(struct netmap_kring *kring, int flags)
265{
266	(void)kring;
267	(void)flags;
268	return 0;
269}
270
271static void
272nm_monitor_intercept_callbacks(struct netmap_kring *kring)
273{
274	nm_prdis("intercept callbacks on %s", kring->name);
275	kring->mon_sync = kring->nm_sync != NULL ?
276		kring->nm_sync : nm_monitor_dummycb;
277	kring->mon_notify = kring->nm_notify;
278	if (kring->tx == NR_TX) {
279		kring->nm_sync = netmap_monitor_parent_txsync;
280	} else {
281		kring->nm_sync = netmap_monitor_parent_rxsync;
282		kring->nm_notify = netmap_monitor_parent_notify;
283		kring->mon_tail = kring->nr_hwtail;
284	}
285}
286
287static void
288nm_monitor_restore_callbacks(struct netmap_kring *kring)
289{
290	nm_prdis("restoring callbacks on %s", kring->name);
291	kring->nm_sync = kring->mon_sync;
292	kring->mon_sync = NULL;
293	if (kring->tx == NR_RX) {
294		kring->nm_notify = kring->mon_notify;
295	}
296	kring->mon_notify = NULL;
297}
298
299static struct netmap_kring *
300nm_zmon_list_head(struct netmap_kring *mkring, enum txrx t)
301{
302	struct netmap_adapter *na = mkring->na;
303	struct netmap_kring *kring = mkring;
304	struct netmap_zmon_list *z = &kring->zmon_list[t];
305	/* reach the head of the list */
306	while (nm_is_zmon(na) && z->prev != NULL) {
307		kring = z->prev;
308		na = kring->na;
309		z = &kring->zmon_list[t];
310	}
311	return nm_is_zmon(na) ? NULL : kring;
312}
313
314/* add the monitor mkring to the list of monitors of kring.
315 * If this is the first monitor, intercept the callbacks
316 */
317static int
318netmap_monitor_add(struct netmap_kring *mkring, struct netmap_kring *kring, int zmon)
319{
320	int error = NM_IRQ_COMPLETED;
321	enum txrx t = kring->tx;
322	struct netmap_zmon_list *z = &kring->zmon_list[t];
323	struct netmap_zmon_list *mz = &mkring->zmon_list[t];
324	struct netmap_kring *ikring = kring;
325
326	/* a zero-copy monitor which is not the first in the list
327	 * must monitor the previous monitor
328	 */
329	if (zmon && z->prev != NULL)
330		ikring = z->prev; /* tail of the list */
331
332	/* synchronize with concurrently running nm_sync()s */
333	nm_kr_stop(kring, NM_KR_LOCKED);
334
335	if (nm_monitor_none(ikring)) {
336		/* this is the first monitor, intercept the callbacks */
337		nm_prdis("%s: intercept callbacks on %s", mkring->name, ikring->name);
338		nm_monitor_intercept_callbacks(ikring);
339	}
340
341	if (zmon) {
342		/* append the zmon to the list */
343		ikring->zmon_list[t].next = mkring;
344		z->prev = mkring; /* new tail */
345		mz->prev = ikring;
346		mz->next = NULL;
347		/* grab a reference to the previous netmap adapter
348		 * in the chain (this may be the monitored port
349		 * or another zero-copy monitor)
350		 */
351		netmap_adapter_get(ikring->na);
352	} else {
353		/* make sure the monitor array exists and is big enough */
354		error = nm_monitor_alloc(kring, kring->n_monitors + 1);
355		if (error)
356			goto out;
357		kring->monitors[kring->n_monitors] = mkring;
358		mkring->mon_pos[kring->tx] = kring->n_monitors;
359		kring->n_monitors++;
360	}
361
362out:
363	nm_kr_start(kring);
364	return error;
365}
366
367/* remove the monitor mkring from the list of monitors of kring.
368 * If this is the last monitor, restore the original callbacks
369 */
370static void
371netmap_monitor_del(struct netmap_kring *mkring, struct netmap_kring *kring, enum txrx t)
372{
373	int zmon = nm_is_zmon(mkring->na);
374	struct netmap_zmon_list *mz = &mkring->zmon_list[t];
375	struct netmap_kring *ikring = kring;
376
377
378	if (zmon) {
379		/* get to the head of the list */
380		kring = nm_zmon_list_head(mkring, t);
381		ikring = mz->prev;
382	}
383
384	/* synchronize with concurrently running nm_sync()s
385	 * if kring is NULL (orphaned list) the monitored port
386	 * has exited netmap mode, so there is nothing to stop
387	 */
388	if (kring != NULL)
389		nm_kr_stop(kring, NM_KR_LOCKED);
390
391	if (zmon) {
392		/* remove the monitor from the list */
393		if (mz->next != NULL) {
394			mz->next->zmon_list[t].prev = mz->prev;
395			/* we also need to let the next monitor drop the
396			 * reference to us and grab the reference to the
397			 * previous ring owner, instead
398			 */
399			if (mz->prev != NULL)
400				netmap_adapter_get(mz->prev->na);
401			netmap_adapter_put(mkring->na);
402		} else if (kring != NULL) {
403			/* in the monitored kring, prev is actually the
404			 * pointer to the tail of the list
405			 */
406			kring->zmon_list[t].prev =
407				(mz->prev != kring ? mz->prev : NULL);
408		}
409		if (mz->prev != NULL) {
410			netmap_adapter_put(mz->prev->na);
411			mz->prev->zmon_list[t].next = mz->next;
412		}
413		mz->prev = NULL;
414		mz->next = NULL;
415	} else {
416		/* this is a copy monitor */
417		uint32_t mon_pos = mkring->mon_pos[kring->tx];
418		kring->n_monitors--;
419		if (mon_pos != kring->n_monitors) {
420			kring->monitors[mon_pos] =
421				kring->monitors[kring->n_monitors];
422			kring->monitors[mon_pos]->mon_pos[kring->tx] = mon_pos;
423		}
424		kring->monitors[kring->n_monitors] = NULL;
425		if (kring->n_monitors == 0) {
426			nm_monitor_dealloc(kring);
427		}
428	}
429
430	if (ikring != NULL && nm_monitor_none(ikring)) {
431		/* this was the last monitor, restore the callbacks */
432		nm_monitor_restore_callbacks(ikring);
433	}
434
435	if (kring != NULL)
436		nm_kr_start(kring);
437}
438
439
440/* This is called when the monitored adapter leaves netmap mode
441 * (see netmap_do_unregif).
442 * We need to notify the monitors that the monitored rings are gone.
443 * We do this by setting their mna->priv.np_na to NULL.
444 * Note that the rings are already stopped when this happens, so
445 * no monitor ring callback can be active.
446 */
447void
448netmap_monitor_stop(struct netmap_adapter *na)
449{
450	enum txrx t;
451
452	for_rx_tx(t) {
453		u_int i;
454
455		for (i = 0; i < netmap_all_rings(na, t); i++) {
456			struct netmap_kring *kring = NMR(na, t)[i];
457			struct netmap_zmon_list *z = &kring->zmon_list[t];
458			u_int j;
459
460			if (nm_monitor_none(kring))
461				continue;
462
463			for (j = 0; j < kring->n_monitors; j++) {
464				struct netmap_kring *mkring =
465					kring->monitors[j];
466				struct netmap_monitor_adapter *mna =
467					(struct netmap_monitor_adapter *)mkring->na;
468				/* forget about this adapter */
469				if (mna->priv.np_na != NULL) {
470					netmap_adapter_put(mna->priv.np_na);
471					mna->priv.np_na = NULL;
472				}
473				kring->monitors[j] = NULL;
474			}
475			kring->n_monitors = 0;
476			nm_monitor_dealloc(kring);
477
478			if (!nm_is_zmon(na)) {
479				/* we are the head of at most one list */
480				struct netmap_kring *zkring;
481				for (zkring = z->next; zkring != NULL;
482						zkring = zkring->zmon_list[t].next)
483				{
484					struct netmap_monitor_adapter *next =
485						(struct netmap_monitor_adapter *)zkring->na;
486					/* let the monitor forget about us */
487					netmap_adapter_put(next->priv.np_na); /* nop if null */
488					next->priv.np_na = NULL;
489					/* drop the additional ref taken in netmap_monitor_add() */
490					netmap_adapter_put(zkring->zmon_list[t].prev->na);
491				}
492				/* orphan the zmon list */
493				if (z->next != NULL)
494					z->next->zmon_list[t].prev = NULL;
495				z->next = NULL;
496				z->prev = NULL;
497			}
498
499			nm_monitor_restore_callbacks(kring);
500		}
501	}
502}
503
504
505/* common functions for the nm_register() callbacks of both kind of
506 * monitors.
507 */
508static int
509netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon)
510{
511	struct netmap_monitor_adapter *mna =
512		(struct netmap_monitor_adapter *)na;
513	struct netmap_priv_d *priv = &mna->priv;
514	struct netmap_adapter *pna = priv->np_na;
515	struct netmap_kring *kring, *mkring;
516	int i;
517	enum txrx t, s;
518
519	nm_prdis("%p: onoff %d", na, onoff);
520	if (onoff) {
521		if (pna == NULL) {
522			/* parent left netmap mode, fatal */
523			nm_prerr("%s: parent left netmap mode", na->name);
524			return ENXIO;
525		}
526		for_rx_tx(t) {
527			for (i = 0; i < netmap_all_rings(na, t); i++) {
528				mkring = NMR(na, t)[i];
529				if (!nm_kring_pending_on(mkring))
530					continue;
531				mkring->nr_mode = NKR_NETMAP_ON;
532				if (t == NR_TX)
533					continue;
534				for_rx_tx(s) {
535					if (i > nma_get_nrings(pna, s))
536						continue;
537					if (mna->flags & nm_txrx2flag(s)) {
538						kring = NMR(pna, s)[i];
539						netmap_monitor_add(mkring, kring, zmon);
540					}
541				}
542			}
543		}
544		na->na_flags |= NAF_NETMAP_ON;
545	} else {
546		if (na->active_fds == 0)
547			na->na_flags &= ~NAF_NETMAP_ON;
548		for_rx_tx(t) {
549			for (i = 0; i < netmap_all_rings(na, t); i++) {
550				mkring = NMR(na, t)[i];
551				if (!nm_kring_pending_off(mkring))
552					continue;
553				mkring->nr_mode = NKR_NETMAP_OFF;
554				if (t == NR_TX)
555					continue;
556				/* we cannot access the parent krings if the parent
557				 * has left netmap mode. This is signaled by a NULL
558				 * pna pointer
559				 */
560				if (pna == NULL)
561					continue;
562				for_rx_tx(s) {
563					if (i > nma_get_nrings(pna, s))
564						continue;
565					if (mna->flags & nm_txrx2flag(s)) {
566						kring = NMR(pna, s)[i];
567						netmap_monitor_del(mkring, kring, s);
568					}
569				}
570			}
571		}
572	}
573	return 0;
574}
575
576/*
577 ****************************************************************
578 * functions specific for zero-copy monitors
579 ****************************************************************
580 */
581
582/*
583 * Common function for both zero-copy tx and rx nm_sync()
584 * callbacks
585 */
586static int
587netmap_zmon_parent_sync(struct netmap_kring *kring, int flags, enum txrx tx)
588{
589	struct netmap_kring *mkring = kring->zmon_list[tx].next;
590	struct netmap_ring *ring = kring->ring, *mring;
591	int error = 0;
592	int rel_slots, free_slots, busy, sent = 0;
593	u_int beg, end, i;
594	u_int lim = kring->nkr_num_slots - 1,
595	      mlim; // = mkring->nkr_num_slots - 1;
596	uint16_t txmon = kring->tx == NR_TX ? NS_TXMON : 0;
597
598	if (mkring == NULL) {
599		nm_prlim(5, "NULL monitor on %s", kring->name);
600		return 0;
601	}
602	mring = mkring->ring;
603	mlim = mkring->nkr_num_slots - 1;
604
605	/* get the released slots (rel_slots) */
606	if (tx == NR_TX) {
607		beg = kring->nr_hwtail + 1;
608		error = kring->mon_sync(kring, flags);
609		if (error)
610			return error;
611		end = kring->nr_hwtail + 1;
612	} else { /* NR_RX */
613		beg = kring->nr_hwcur;
614		end = kring->rhead;
615	}
616
617	rel_slots = end - beg;
618	if (rel_slots < 0)
619		rel_slots += kring->nkr_num_slots;
620
621	if (!rel_slots) {
622		/* no released slots, but we still need
623		 * to call rxsync if this is a rx ring
624		 */
625		goto out_rxsync;
626	}
627
628	/* we need to lock the monitor receive ring, since it
629	 * is the target of bot tx and rx traffic from the monitored
630	 * adapter
631	 */
632	mtx_lock(&mkring->q_lock);
633	/* get the free slots available on the monitor ring */
634	i = mkring->nr_hwtail;
635	busy = i - mkring->nr_hwcur;
636	if (busy < 0)
637		busy += mkring->nkr_num_slots;
638	free_slots = mlim - busy;
639
640	if (!free_slots)
641		goto out;
642
643	/* swap min(free_slots, rel_slots) slots */
644	if (free_slots < rel_slots) {
645		beg += (rel_slots - free_slots);
646		rel_slots = free_slots;
647	}
648	if (unlikely(beg >= kring->nkr_num_slots))
649		beg -= kring->nkr_num_slots;
650
651	sent = rel_slots;
652	for ( ; rel_slots; rel_slots--) {
653		struct netmap_slot *s = &ring->slot[beg];
654		struct netmap_slot *ms = &mring->slot[i];
655		uint32_t tmp;
656
657		tmp = ms->buf_idx;
658		ms->buf_idx = s->buf_idx;
659		s->buf_idx = tmp;
660		nm_prdis(5, "beg %d buf_idx %d", beg, tmp);
661
662		tmp = ms->len;
663		ms->len = s->len;
664		s->len = tmp;
665
666		ms->flags = (s->flags & ~NS_TXMON) | txmon;
667		s->flags |= NS_BUF_CHANGED;
668
669		beg = nm_next(beg, lim);
670		i = nm_next(i, mlim);
671
672	}
673	mb();
674	mkring->nr_hwtail = i;
675
676out:
677	mtx_unlock(&mkring->q_lock);
678
679	if (sent) {
680		/* notify the new frames to the monitor */
681		mkring->nm_notify(mkring, 0);
682	}
683
684out_rxsync:
685	if (tx == NR_RX)
686		error = kring->mon_sync(kring, flags);
687
688	return error;
689}
690
691/* callback used to replace the nm_sync callback in the monitored tx rings */
692static int
693netmap_zmon_parent_txsync(struct netmap_kring *kring, int flags)
694{
695	return netmap_zmon_parent_sync(kring, flags, NR_TX);
696}
697
698/* callback used to replace the nm_sync callback in the monitored rx rings */
699static int
700netmap_zmon_parent_rxsync(struct netmap_kring *kring, int flags)
701{
702	return netmap_zmon_parent_sync(kring, flags, NR_RX);
703}
704
705static int
706netmap_zmon_reg(struct netmap_adapter *na, int onoff)
707{
708	return netmap_monitor_reg_common(na, onoff, 1 /* zcopy */);
709}
710
711/* nm_dtor callback for monitors */
712static void
713netmap_zmon_dtor(struct netmap_adapter *na)
714{
715	struct netmap_monitor_adapter *mna =
716		(struct netmap_monitor_adapter *)na;
717	struct netmap_priv_d *priv = &mna->priv;
718	struct netmap_adapter *pna = priv->np_na;
719
720	netmap_adapter_put(pna);
721}
722
723/*
724 ****************************************************************
725 * functions specific for copy monitors
726 ****************************************************************
727 */
728
729static void
730netmap_monitor_parent_sync(struct netmap_kring *kring, u_int first_new, int new_slots)
731{
732	u_int j;
733	uint16_t txmon = kring->tx == NR_TX ? NS_TXMON : 0;
734
735	for (j = 0; j < kring->n_monitors; j++) {
736		struct netmap_kring *mkring = kring->monitors[j];
737		u_int i, mlim, beg;
738		int free_slots, busy, sent = 0, m;
739		u_int lim = kring->nkr_num_slots - 1;
740		struct netmap_ring *ring = kring->ring, *mring = mkring->ring;
741		u_int max_len;
742		mlim = mkring->nkr_num_slots - 1;
743
744		/* we need to lock the monitor receive ring, since it
745		 * is the target of bot tx and rx traffic from the monitored
746		 * adapter
747		 */
748		mtx_lock(&mkring->q_lock);
749		/* get the free slots available on the monitor ring */
750		i = mkring->nr_hwtail;
751		busy = i - mkring->nr_hwcur;
752		if (busy < 0)
753			busy += mkring->nkr_num_slots;
754		free_slots = mlim - busy;
755
756		if (!free_slots)
757			goto out;
758
759		/* copy min(free_slots, new_slots) slots */
760		m = new_slots;
761		beg = first_new;
762		if (free_slots < m) {
763			beg += (m - free_slots);
764			if (beg >= kring->nkr_num_slots)
765				beg -= kring->nkr_num_slots;
766			m = free_slots;
767		}
768
769		for ( ; m; m--) {
770			struct netmap_slot *s = &ring->slot[beg];
771			struct netmap_slot *ms = &mring->slot[i];
772			u_int copy_len = s->len;
773			char *src = NMB_O(kring, s),
774			     *dst = NMB_O(mkring, ms);
775
776			max_len = NETMAP_BUF_SIZE(mkring->na) - nm_get_offset(mkring, ms);
777			if (unlikely(copy_len > max_len)) {
778				nm_prlim(5, "%s->%s: truncating %d to %d", kring->name,
779						mkring->name, copy_len, max_len);
780				copy_len = max_len;
781			}
782
783			memcpy(dst, src, copy_len);
784			ms->len = copy_len;
785			ms->flags = (s->flags & ~NS_TXMON) | txmon;
786			sent++;
787
788			beg = nm_next(beg, lim);
789			i = nm_next(i, mlim);
790		}
791		mb();
792		mkring->nr_hwtail = i;
793	out:
794		mtx_unlock(&mkring->q_lock);
795
796		if (sent) {
797			/* notify the new frames to the monitor */
798			mkring->nm_notify(mkring, 0);
799		}
800	}
801}
802
803/* callback used to replace the nm_sync callback in the monitored tx rings */
804static int
805netmap_monitor_parent_txsync(struct netmap_kring *kring, int flags)
806{
807	u_int first_new;
808	int new_slots;
809
810	/* get the new slots */
811	if (kring->n_monitors > 0) {
812		first_new = kring->nr_hwcur;
813		new_slots = kring->rhead - first_new;
814		if (new_slots < 0)
815			new_slots += kring->nkr_num_slots;
816		if (new_slots)
817			netmap_monitor_parent_sync(kring, first_new, new_slots);
818	}
819	if (kring->zmon_list[NR_TX].next != NULL) {
820		return netmap_zmon_parent_txsync(kring, flags);
821	}
822	return kring->mon_sync(kring, flags);
823}
824
825/* callback used to replace the nm_sync callback in the monitored rx rings */
826static int
827netmap_monitor_parent_rxsync(struct netmap_kring *kring, int flags)
828{
829	u_int first_new;
830	int new_slots, error;
831
832	/* get the new slots */
833	if (kring->zmon_list[NR_RX].next != NULL) {
834		error = netmap_zmon_parent_rxsync(kring, flags);
835	} else {
836		error =  kring->mon_sync(kring, flags);
837	}
838	if (error)
839		return error;
840	if (kring->n_monitors > 0) {
841		first_new = kring->mon_tail;
842		new_slots = kring->nr_hwtail - first_new;
843		if (new_slots < 0)
844			new_slots += kring->nkr_num_slots;
845		if (new_slots)
846			netmap_monitor_parent_sync(kring, first_new, new_slots);
847		kring->mon_tail = kring->nr_hwtail;
848	}
849	return 0;
850}
851
852/* callback used to replace the nm_notify() callback in the monitored rx rings */
853static int
854netmap_monitor_parent_notify(struct netmap_kring *kring, int flags)
855{
856	int (*notify)(struct netmap_kring*, int);
857	nm_prdis(5, "%s %x", kring->name, flags);
858	/* ?xsync callbacks have tryget called by their callers
859	 * (NIOCREGIF and poll()), but here we have to call it
860	 * by ourself
861	 */
862	if (nm_kr_tryget(kring, 0, NULL)) {
863		/* in all cases, just skip the sync */
864		return NM_IRQ_COMPLETED;
865	}
866	if (kring->n_monitors > 0) {
867		netmap_monitor_parent_rxsync(kring, NAF_FORCE_READ);
868	}
869	if (nm_monitor_none(kring)) {
870		/* we are no longer monitoring this ring, so both
871		 * mon_sync and mon_notify are NULL
872		 */
873		notify = kring->nm_notify;
874	} else {
875		notify = kring->mon_notify;
876	}
877	nm_kr_put(kring);
878	return notify(kring, flags);
879}
880
881
882static int
883netmap_monitor_reg(struct netmap_adapter *na, int onoff)
884{
885	return netmap_monitor_reg_common(na, onoff, 0 /* no zcopy */);
886}
887
888static void
889netmap_monitor_dtor(struct netmap_adapter *na)
890{
891	struct netmap_monitor_adapter *mna =
892		(struct netmap_monitor_adapter *)na;
893	struct netmap_priv_d *priv = &mna->priv;
894	struct netmap_adapter *pna = priv->np_na;
895
896	netmap_adapter_put(pna);
897}
898
899
900/* check if req is a request for a monitor adapter that we can satisfy */
901int
902netmap_get_monitor_na(struct nmreq_header *hdr, struct netmap_adapter **na,
903			struct netmap_mem_d *nmd, int create)
904{
905	struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body;
906	struct nmreq_register preq;
907	struct netmap_adapter *pna; /* parent adapter */
908	struct netmap_monitor_adapter *mna;
909	if_t ifp = NULL;
910	int  error;
911	int zcopy = (req->nr_flags & NR_ZCOPY_MON);
912
913	if (zcopy) {
914		req->nr_flags |= (NR_MONITOR_TX | NR_MONITOR_RX);
915	}
916	if ((req->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX)) == 0) {
917		nm_prdis("not a monitor");
918		return 0;
919	}
920	/* this is a request for a monitor adapter */
921
922	nm_prdis("flags %lx", req->nr_flags);
923
924	/* First, try to find the adapter that we want to monitor.
925	 * We use the same req, after we have turned off the monitor flags.
926	 * In this way we can potentially monitor everything netmap understands,
927	 * except other monitors.
928	 */
929	memcpy(&preq, req, sizeof(preq));
930	preq.nr_flags &= ~(NR_MONITOR_TX | NR_MONITOR_RX | NR_ZCOPY_MON);
931	hdr->nr_body = (uintptr_t)&preq;
932	error = netmap_get_na(hdr, &pna, &ifp, nmd, create);
933	hdr->nr_body = (uintptr_t)req;
934	if (error) {
935		nm_prerr("parent lookup failed: %d", error);
936		return error;
937	}
938	nm_prdis("found parent: %s", pna->name);
939
940	if (!nm_netmap_on(pna)) {
941		/* parent not in netmap mode */
942		/* XXX we can wait for the parent to enter netmap mode,
943		 * by intercepting its nm_register callback (2014-03-16)
944		 */
945		nm_prerr("%s not in netmap mode", pna->name);
946		error = EINVAL;
947		goto put_out;
948	}
949
950	mna = nm_os_malloc(sizeof(*mna));
951	if (mna == NULL) {
952		error = ENOMEM;
953		goto put_out;
954	}
955	mna->priv.np_na = pna;
956
957	/* grab all the rings we need in the parent */
958	error = netmap_interp_ringid(&mna->priv, hdr);
959	if (error) {
960		nm_prerr("ringid error");
961		goto free_out;
962	}
963	snprintf(mna->up.name, sizeof(mna->up.name), "%s/%s%s%s#%lu", pna->name,
964			zcopy ? "z" : "",
965			(req->nr_flags & NR_MONITOR_RX) ? "r" : "",
966			(req->nr_flags & NR_MONITOR_TX) ? "t" : "",
967			pna->monitor_id++);
968
969	/* the monitor supports the host rings iff the parent does */
970	mna->up.na_flags |= (pna->na_flags & NAF_HOST_RINGS) & ~NAF_OFFSETS;
971	if (!zcopy)
972		mna->up.na_flags |= NAF_OFFSETS;
973	/* a do-nothing txsync: monitors cannot be used to inject packets */
974	mna->up.nm_txsync = netmap_monitor_txsync;
975	mna->up.nm_rxsync = netmap_monitor_rxsync;
976	mna->up.nm_krings_create = netmap_monitor_krings_create;
977	mna->up.nm_krings_delete = netmap_monitor_krings_delete;
978	mna->up.num_tx_rings = 1; // XXX what should we do here with chained zmons?
979	/* we set the number of our rx_rings to be max(num_rx_rings, num_rx_rings)
980	 * in the parent
981	 */
982	mna->up.num_rx_rings = pna->num_rx_rings;
983	if (pna->num_tx_rings > pna->num_rx_rings)
984		mna->up.num_rx_rings = pna->num_tx_rings;
985	/* by default, the number of slots is the same as in
986	 * the parent rings, but the user may ask for a different
987	 * number
988	 */
989	mna->up.num_tx_desc = req->nr_tx_slots;
990	nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc,
991			1, NM_MONITOR_MAXSLOTS, NULL);
992	mna->up.num_rx_desc = req->nr_rx_slots;
993	nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc,
994			1, NM_MONITOR_MAXSLOTS, NULL);
995	if (zcopy) {
996		mna->up.nm_register = netmap_zmon_reg;
997		mna->up.nm_dtor = netmap_zmon_dtor;
998		/* to have zero copy, we need to use the same memory allocator
999		 * as the monitored port
1000		 */
1001		mna->up.nm_mem = netmap_mem_get(pna->nm_mem);
1002		/* and the allocator cannot be changed */
1003		mna->up.na_flags |= NAF_MEM_OWNER;
1004	} else {
1005		mna->up.nm_register = netmap_monitor_reg;
1006		mna->up.nm_dtor = netmap_monitor_dtor;
1007		mna->up.nm_mem = netmap_mem_private_new(
1008				mna->up.num_tx_rings,
1009				mna->up.num_tx_desc,
1010				mna->up.num_rx_rings,
1011				mna->up.num_rx_desc,
1012				0, /* extra bufs */
1013				0, /* pipes */
1014				&error);
1015		if (mna->up.nm_mem == NULL)
1016			goto put_out;
1017	}
1018
1019	error = netmap_attach_common(&mna->up);
1020	if (error) {
1021		nm_prerr("netmap_attach_common failed");
1022		goto mem_put_out;
1023	}
1024
1025	/* remember the traffic directions we have to monitor */
1026	mna->flags = (req->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX | NR_ZCOPY_MON));
1027
1028	*na = &mna->up;
1029	netmap_adapter_get(*na);
1030
1031	/* keep the reference to the parent */
1032	nm_prdis("monitor ok");
1033
1034	/* drop the reference to the ifp, if any */
1035	if (ifp)
1036		if_rele(ifp);
1037
1038	return 0;
1039
1040mem_put_out:
1041	netmap_mem_put(mna->up.nm_mem);
1042free_out:
1043	nm_os_free(mna);
1044put_out:
1045	netmap_unget_na(pna, ifp);
1046	return error;
1047}
1048
1049
1050#endif /* WITH_MONITOR */
1051