netmap_monitor.c revision 270252
1/*
2 * Copyright (C) 2014 Giuseppe Lettieri. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *   1. Redistributions of source code must retain the above copyright
8 *      notice, this list of conditions and the following disclaimer.
9 *   2. Redistributions in binary form must reproduce the above copyright
10 *      notice, this list of conditions and the following disclaimer in the
11 *      documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26/*
27 * $FreeBSD: stable/10/sys/dev/netmap/netmap_monitor.c 270252 2014-08-20 23:34:36Z luigi $
28 *
29 * Monitors
30 *
31 * netmap monitors can be used to do zero-copy monitoring of network traffic
32 * on another adapter, when the latter adapter is working in netmap mode.
33 *
34 * Monitors offer to userspace the same interface as any other netmap port,
35 * with as many pairs of netmap rings as the monitored adapter.
36 * However, only the rx rings are actually used. Each monitor rx ring receives
37 * the traffic transiting on both the tx and rx corresponding rings in the
38 * monitored adapter. During registration, the user can choose if she wants
39 * to intercept tx only, rx only, or both tx and rx traffic.
40 *
41 * The monitor only sees the frames after they have been consumed in the
42 * monitored adapter:
43 *
44 *  - For tx traffic, this is after the slots containing the frames have been
45 *    marked as free. Note that this may happen at a considerably delay after
46 *    frame transmission, since freeing of slots is often done lazily.
47 *
48 *  - For rx traffic, this is after the consumer on the monitored adapter
49 *    has released them. In most cases, the consumer is a userspace
50 *    application which may have modified the frame contents.
51 *
52 * If the monitor is not able to cope with the stream of frames, excess traffic
53 * will be dropped.
54 *
55 * Each ring can be monitored by at most one monitor. This may change in the
56 * future, if we implement monitor chaining.
57 *
58 */
59
60
61#if defined(__FreeBSD__)
62#include <sys/cdefs.h> /* prerequisite */
63
64#include <sys/types.h>
65#include <sys/errno.h>
66#include <sys/param.h>	/* defines used in kernel.h */
67#include <sys/kernel.h>	/* types used in module initialization */
68#include <sys/malloc.h>
69#include <sys/poll.h>
70#include <sys/lock.h>
71#include <sys/rwlock.h>
72#include <sys/selinfo.h>
73#include <sys/sysctl.h>
74#include <sys/socket.h> /* sockaddrs */
75#include <net/if.h>
76#include <net/if_var.h>
77#include <machine/bus.h>	/* bus_dmamap_* */
78#include <sys/refcount.h>
79
80
81#elif defined(linux)
82
83#include "bsd_glue.h"
84
85#elif defined(__APPLE__)
86
87#warning OSX support is only partial
88#include "osx_glue.h"
89
90#else
91
92#error	Unsupported platform
93
94#endif /* unsupported */
95
96/*
97 * common headers
98 */
99
100#include <net/netmap.h>
101#include <dev/netmap/netmap_kern.h>
102#include <dev/netmap/netmap_mem2.h>
103
104#ifdef WITH_MONITOR
105
106#define NM_MONITOR_MAXSLOTS 4096
107
108/* monitor works by replacing the nm_sync callbacks in the monitored rings.
109 * The actions to be performed are the same on both tx and rx rings, so we
110 * have collected them here
111 */
112static int
113netmap_monitor_parent_sync(struct netmap_kring *kring, int flags, u_int* ringptr)
114{
115	struct netmap_monitor_adapter *mna = kring->monitor;
116	struct netmap_kring *mkring = &mna->up.rx_rings[kring->ring_id];
117	struct netmap_ring *ring = kring->ring, *mring = mkring->ring;
118	int error;
119	int rel_slots, free_slots, busy;
120	u_int beg, end, i;
121	u_int lim = kring->nkr_num_slots - 1,
122	      mlim = mkring->nkr_num_slots - 1;
123
124	/* get the relased slots (rel_slots) */
125	beg = *ringptr;
126	error = kring->save_sync(kring, flags);
127	if (error)
128		return error;
129	end = *ringptr;
130	rel_slots = end - beg;
131	if (rel_slots < 0)
132		rel_slots += kring->nkr_num_slots;
133
134	if (!rel_slots) {
135		return 0;
136	}
137
138	/* we need to lock the monitor receive ring, since it
139	 * is the target of bot tx and rx traffic from the monitored
140	 * adapter
141	 */
142	mtx_lock(&mkring->q_lock);
143	/* get the free slots available on the monitor ring */
144	i = mkring->nr_hwtail;
145	busy = i - mkring->nr_hwcur;
146	if (busy < 0)
147		busy += mkring->nkr_num_slots;
148	free_slots = mlim - busy;
149
150	if (!free_slots) {
151		mtx_unlock(&mkring->q_lock);
152		return 0;
153	}
154
155	/* swap min(free_slots, rel_slots) slots */
156	if (free_slots < rel_slots) {
157		beg += (rel_slots - free_slots);
158		if (beg > lim)
159			beg = 0;
160		rel_slots = free_slots;
161	}
162
163	for ( ; rel_slots; rel_slots--) {
164		struct netmap_slot *s = &ring->slot[beg];
165		struct netmap_slot *ms = &mring->slot[i];
166		uint32_t tmp;
167
168		tmp = ms->buf_idx;
169		ms->buf_idx = s->buf_idx;
170		s->buf_idx = tmp;
171
172		tmp = ms->len;
173		ms->len = s->len;
174		s->len = tmp;
175
176		s->flags |= NS_BUF_CHANGED;
177
178		beg = nm_next(beg, lim);
179		i = nm_next(i, mlim);
180
181	}
182	wmb();
183	mkring->nr_hwtail = i;
184
185	mtx_unlock(&mkring->q_lock);
186	/* notify the new frames to the monitor */
187	mna->up.nm_notify(&mna->up, mkring->ring_id, NR_RX, 0);
188	return 0;
189}
190
191/* callback used to replace the nm_sync callback in the monitored tx rings */
192static int
193netmap_monitor_parent_txsync(struct netmap_kring *kring, int flags)
194{
195        ND("%s %x", kring->name, flags);
196        return netmap_monitor_parent_sync(kring, flags, &kring->nr_hwtail);
197}
198
199/* callback used to replace the nm_sync callback in the monitored rx rings */
200static int
201netmap_monitor_parent_rxsync(struct netmap_kring *kring, int flags)
202{
203        ND("%s %x", kring->name, flags);
204        return netmap_monitor_parent_sync(kring, flags, &kring->rcur);
205}
206
207/* nm_sync callback for the monitor's own tx rings.
208 * This makes no sense and always returns error
209 */
210static int
211netmap_monitor_txsync(struct netmap_kring *kring, int flags)
212{
213        D("%s %x", kring->name, flags);
214	return EIO;
215}
216
217/* nm_sync callback for the monitor's own rx rings.
218 * Note that the lock in netmap_monitor_parent_sync only protects
219 * writers among themselves. Synchronization between writers
220 * (i.e., netmap_monitor_parent_txsync and netmap_monitor_parent_rxsync)
221 * and readers (i.e., netmap_monitor_rxsync) relies on memory barriers.
222 */
223static int
224netmap_monitor_rxsync(struct netmap_kring *kring, int flags)
225{
226        ND("%s %x", kring->name, flags);
227	kring->nr_hwcur = kring->rcur;
228	rmb();
229	nm_rxsync_finalize(kring);
230        return 0;
231}
232
233/* nm_krings_create callbacks for monitors.
234 * We could use the default netmap_hw_krings_monitor, but
235 * we don't need the mbq.
236 */
237static int
238netmap_monitor_krings_create(struct netmap_adapter *na)
239{
240	return netmap_krings_create(na, 0);
241}
242
243
244/* nm_register callback for monitors.
245 *
246 * On registration, replace the nm_sync callbacks in the monitored
247 * rings with our own, saving the previous ones in the monitored
248 * rings themselves, where they are used by netmap_monitor_parent_sync.
249 *
250 * On de-registration, restore the original callbacks. We need to
251 * stop traffic while we are doing this, since the monitored adapter may
252 * have already started executing a netmap_monitor_parent_sync
253 * and may not like the kring->save_sync pointer to become NULL.
254 */
255static int
256netmap_monitor_reg(struct netmap_adapter *na, int onoff)
257{
258	struct netmap_monitor_adapter *mna =
259		(struct netmap_monitor_adapter *)na;
260	struct netmap_priv_d *priv = &mna->priv;
261	struct netmap_adapter *pna = priv->np_na;
262	struct netmap_kring *kring;
263	int i;
264
265	ND("%p: onoff %d", na, onoff);
266	if (onoff) {
267		if (!nm_netmap_on(pna)) {
268			/* parent left netmap mode, fatal */
269			return ENXIO;
270		}
271		if (mna->flags & NR_MONITOR_TX) {
272			for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) {
273				kring = &pna->tx_rings[i];
274				kring->save_sync = kring->nm_sync;
275				kring->nm_sync = netmap_monitor_parent_txsync;
276			}
277		}
278		if (mna->flags & NR_MONITOR_RX) {
279			for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) {
280				kring = &pna->rx_rings[i];
281				kring->save_sync = kring->nm_sync;
282				kring->nm_sync = netmap_monitor_parent_rxsync;
283			}
284		}
285		na->na_flags |= NAF_NETMAP_ON;
286	} else {
287		if (!nm_netmap_on(pna)) {
288			/* parent left netmap mode, nothing to restore */
289			return 0;
290		}
291		na->na_flags &= ~NAF_NETMAP_ON;
292		if (mna->flags & NR_MONITOR_TX) {
293			for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) {
294				netmap_set_txring(pna, i, 1 /* stopped */);
295				kring = &pna->tx_rings[i];
296				kring->nm_sync = kring->save_sync;
297				kring->save_sync = NULL;
298				netmap_set_txring(pna, i, 0 /* enabled */);
299			}
300		}
301		if (mna->flags & NR_MONITOR_RX) {
302			for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) {
303				netmap_set_rxring(pna, i, 1 /* stopped */);
304				kring = &pna->rx_rings[i];
305				kring->nm_sync = kring->save_sync;
306				kring->save_sync = NULL;
307				netmap_set_rxring(pna, i, 0 /* enabled */);
308			}
309		}
310	}
311	return 0;
312}
313/* nm_krings_delete callback for monitors */
314static void
315netmap_monitor_krings_delete(struct netmap_adapter *na)
316{
317	netmap_krings_delete(na);
318}
319
320
321/* nm_dtor callback for monitors */
322static void
323netmap_monitor_dtor(struct netmap_adapter *na)
324{
325	struct netmap_monitor_adapter *mna =
326		(struct netmap_monitor_adapter *)na;
327	struct netmap_priv_d *priv = &mna->priv;
328	struct netmap_adapter *pna = priv->np_na;
329	int i;
330
331	ND("%p", na);
332	if (nm_netmap_on(pna)) {
333		/* parent still in netmap mode, mark its krings as free */
334		if (mna->flags & NR_MONITOR_TX) {
335			for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) {
336				pna->tx_rings[i].monitor = NULL;
337			}
338		}
339		if (mna->flags & NR_MONITOR_RX) {
340			for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) {
341				pna->rx_rings[i].monitor = NULL;
342			}
343		}
344	}
345	netmap_adapter_put(pna);
346}
347
348
349/* check if nmr is a request for a monitor adapter that we can satisfy */
350int
351netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
352{
353	struct nmreq pnmr;
354	struct netmap_adapter *pna; /* parent adapter */
355	struct netmap_monitor_adapter *mna;
356	int i, error;
357
358	if ((nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX)) == 0) {
359		ND("not a monitor");
360		return 0;
361	}
362	/* this is a request for a monitor adapter */
363
364	D("flags %x", nmr->nr_flags);
365
366	mna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO);
367	if (mna == NULL) {
368		D("memory error");
369		return ENOMEM;
370	}
371
372	/* first, try to find the adapter that we want to monitor
373	 * We use the same nmr, after we have turned off the monitor flags.
374	 * In this way we can potentially monitor everything netmap understands,
375	 * except other monitors.
376	 */
377	memcpy(&pnmr, nmr, sizeof(pnmr));
378	pnmr.nr_flags &= ~(NR_MONITOR_TX | NR_MONITOR_RX);
379	error = netmap_get_na(&pnmr, &pna, create);
380	if (error) {
381		D("parent lookup failed: %d", error);
382		return error;
383	}
384	D("found parent: %s", pna->name);
385
386	if (!nm_netmap_on(pna)) {
387		/* parent not in netmap mode */
388		/* XXX we can wait for the parent to enter netmap mode,
389		 * by intercepting its nm_register callback (2014-03-16)
390		 */
391		D("%s not in netmap mode", pna->name);
392		error = EINVAL;
393		goto put_out;
394	}
395
396	/* grab all the rings we need in the parent */
397	mna->priv.np_na = pna;
398	error = netmap_interp_ringid(&mna->priv, nmr->nr_ringid, nmr->nr_flags);
399	if (error) {
400		D("ringid error");
401		goto put_out;
402	}
403	if (nmr->nr_flags & NR_MONITOR_TX) {
404		for (i = mna->priv.np_txqfirst; i < mna->priv.np_txqlast; i++) {
405			struct netmap_kring *kring = &pna->tx_rings[i];
406			if (kring->monitor) {
407				error = EBUSY;
408				D("ring busy");
409				goto release_out;
410			}
411			kring->monitor = mna;
412		}
413	}
414	if (nmr->nr_flags & NR_MONITOR_RX) {
415		for (i = mna->priv.np_rxqfirst; i < mna->priv.np_rxqlast; i++) {
416			struct netmap_kring *kring = &pna->rx_rings[i];
417			if (kring->monitor) {
418				error = EBUSY;
419				D("ring busy");
420				goto release_out;
421			}
422			kring->monitor = mna;
423		}
424	}
425
426	snprintf(mna->up.name, sizeof(mna->up.name), "mon:%s", pna->name);
427
428	/* the monitor supports the host rings iff the parent does */
429	mna->up.na_flags = (pna->na_flags & NAF_HOST_RINGS);
430	mna->up.nm_txsync = netmap_monitor_txsync;
431	mna->up.nm_rxsync = netmap_monitor_rxsync;
432	mna->up.nm_register = netmap_monitor_reg;
433	mna->up.nm_dtor = netmap_monitor_dtor;
434	mna->up.nm_krings_create = netmap_monitor_krings_create;
435	mna->up.nm_krings_delete = netmap_monitor_krings_delete;
436	mna->up.nm_mem = pna->nm_mem;
437	mna->up.na_lut = pna->na_lut;
438	mna->up.na_lut_objtotal = pna->na_lut_objtotal;
439	mna->up.na_lut_objsize = pna->na_lut_objsize;
440
441	mna->up.num_tx_rings = 1; // XXX we don't need it, but field can't be zero
442	/* we set the number of our rx_rings to be max(num_rx_rings, num_rx_rings)
443	 * in the parent
444	 */
445	mna->up.num_rx_rings = pna->num_rx_rings;
446	if (pna->num_tx_rings > pna->num_rx_rings)
447		mna->up.num_rx_rings = pna->num_tx_rings;
448	/* by default, the number of slots is the same as in
449	 * the parent rings, but the user may ask for a different
450	 * number
451	 */
452	mna->up.num_tx_desc = nmr->nr_tx_slots;
453	nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc,
454			1, NM_MONITOR_MAXSLOTS, NULL);
455	mna->up.num_rx_desc = nmr->nr_rx_slots;
456	nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc,
457			1, NM_MONITOR_MAXSLOTS, NULL);
458	error = netmap_attach_common(&mna->up);
459	if (error) {
460		D("attach_common error");
461		goto release_out;
462	}
463
464	/* remember the traffic directions we have to monitor */
465	mna->flags = (nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX));
466
467	*na = &mna->up;
468	netmap_adapter_get(*na);
469
470	/* write the configuration back */
471	nmr->nr_tx_rings = mna->up.num_tx_rings;
472	nmr->nr_rx_rings = mna->up.num_rx_rings;
473	nmr->nr_tx_slots = mna->up.num_tx_desc;
474	nmr->nr_rx_slots = mna->up.num_rx_desc;
475
476	/* keep the reference to the parent */
477	D("monitor ok");
478
479	return 0;
480
481release_out:
482	D("monitor error");
483	for (i = mna->priv.np_txqfirst; i < mna->priv.np_txqlast; i++) {
484		if (pna->tx_rings[i].monitor == mna)
485			pna->tx_rings[i].monitor = NULL;
486	}
487	for (i = mna->priv.np_rxqfirst; i < mna->priv.np_rxqlast; i++) {
488		if (pna->rx_rings[i].monitor == mna)
489			pna->rx_rings[i].monitor = NULL;
490	}
491put_out:
492	netmap_adapter_put(pna);
493	free(mna, M_DEVBUF);
494	return error;
495}
496
497
498#endif /* WITH_MONITOR */
499