pcap-netmap.c revision 335640
1/*
2 * Copyright (C) 2014 Luigi Rizzo. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 *   1. Redistributions of source code must retain the above copyright
9 *      notice, this list of conditions and the following disclaimer.
10 *   2. Redistributions in binary form must reproduce the above copyright
11 *      notice, this list of conditions and the following disclaimer in the
12 *      documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#ifdef HAVE_CONFIG_H
28#include <config.h>
29#endif
30
31#include <poll.h>
32#include <ctype.h>
33#include <errno.h>
34#include <netdb.h>
35#include <stdio.h>
36#include <stdlib.h>
37#include <string.h>
38#include <unistd.h>
39
40#define NETMAP_WITH_LIBS
41#include <net/netmap_user.h>
42
43#include "pcap-int.h"
44#include "pcap-netmap.h"
45
46#ifndef __FreeBSD__
47  /*
48   * On FreeBSD we use IFF_PPROMISC which is in ifr_flagshigh.
49   * Remap to IFF_PROMISC on other platforms.
50   *
51   * XXX - DragonFly BSD?
52   */
53  #define IFF_PPROMISC	IFF_PROMISC
54#endif /* __FreeBSD__ */
55
56struct pcap_netmap {
57	struct nm_desc *d;	/* pointer returned by nm_open() */
58	pcap_handler cb;	/* callback and argument */
59	u_char *cb_arg;
60	int must_clear_promisc;	/* flag */
61	uint64_t rx_pkts;	/* # of pkts received before the filter */
62};
63
64
65static int
66pcap_netmap_stats(pcap_t *p, struct pcap_stat *ps)
67{
68	struct pcap_netmap *pn = p->priv;
69
70	ps->ps_recv = pn->rx_pkts;
71	ps->ps_drop = 0;
72	ps->ps_ifdrop = 0;
73	return 0;
74}
75
76
77static void
78pcap_netmap_filter(u_char *arg, struct pcap_pkthdr *h, const u_char *buf)
79{
80	pcap_t *p = (pcap_t *)arg;
81	struct pcap_netmap *pn = p->priv;
82	const struct bpf_insn *pc = p->fcode.bf_insns;
83
84	++pn->rx_pkts;
85	if (pc == NULL || bpf_filter(pc, buf, h->len, h->caplen))
86		pn->cb(pn->cb_arg, h, buf);
87}
88
89
90static int
91pcap_netmap_dispatch(pcap_t *p, int cnt, pcap_handler cb, u_char *user)
92{
93	int ret;
94	struct pcap_netmap *pn = p->priv;
95	struct nm_desc *d = pn->d;
96	struct pollfd pfd = { .fd = p->fd, .events = POLLIN, .revents = 0 };
97
98	pn->cb = cb;
99	pn->cb_arg = user;
100
101	for (;;) {
102		if (p->break_loop) {
103			p->break_loop = 0;
104			return PCAP_ERROR_BREAK;
105		}
106		/* nm_dispatch won't run forever */
107
108		ret = nm_dispatch((void *)d, cnt, (void *)pcap_netmap_filter, (void *)p);
109		if (ret != 0)
110			break;
111		errno = 0;
112		ret = poll(&pfd, 1, p->opt.timeout);
113	}
114	return ret;
115}
116
117
118/* XXX need to check the NIOCTXSYNC/poll */
119static int
120pcap_netmap_inject(pcap_t *p, const void *buf, size_t size)
121{
122	struct pcap_netmap *pn = p->priv;
123	struct nm_desc *d = pn->d;
124
125	return nm_inject(d, buf, size);
126}
127
128
129static int
130pcap_netmap_ioctl(pcap_t *p, u_long what, uint32_t *if_flags)
131{
132	struct pcap_netmap *pn = p->priv;
133	struct nm_desc *d = pn->d;
134	struct ifreq ifr;
135	int error, fd = d->fd;
136
137#ifdef linux
138	fd = socket(AF_INET, SOCK_DGRAM, 0);
139	if (fd < 0) {
140		fprintf(stderr, "Error: cannot get device control socket.\n");
141		return -1;
142	}
143#endif /* linux */
144	bzero(&ifr, sizeof(ifr));
145	strncpy(ifr.ifr_name, d->req.nr_name, sizeof(ifr.ifr_name));
146	switch (what) {
147	case SIOCSIFFLAGS:
148		/*
149		 * The flags we pass in are 32-bit and unsigned.
150		 *
151		 * On most if not all UN*Xes, ifr_flags is 16-bit and
152		 * signed, and the result of assigning a longer
153		 * unsigned value to a shorter signed value is
154		 * implementation-defined (even if, in practice, it'll
155		 * do what's intended on all platforms we support
156		 * result of assigning a 32-bit unsigned value).
157		 * So we mask out the upper 16 bits.
158		 */
159		ifr.ifr_flags = *if_flags & 0xffff;
160#ifdef __FreeBSD__
161		/*
162		 * In FreeBSD, we need to set the high-order flags,
163		 * as we're using IFF_PPROMISC, which is in those bits.
164		 *
165		 * XXX - DragonFly BSD?
166		 */
167		ifr.ifr_flagshigh = *if_flags >> 16;
168#endif /* __FreeBSD__ */
169		break;
170	}
171	error = ioctl(fd, what, &ifr);
172	if (!error) {
173		switch (what) {
174		case SIOCGIFFLAGS:
175			/*
176			 * The flags we return are 32-bit.
177			 *
178			 * On most if not all UN*Xes, ifr_flags is
179			 * 16-bit and signed, and will get sign-
180			 * extended, so that the upper 16 bits of
181			 * those flags will be forced on.  So we
182			 * mask out the upper 16 bits of the
183			 * sign-extended value.
184			 */
185			*if_flags = ifr.ifr_flags & 0xffff;
186#ifdef __FreeBSD__
187			/*
188			 * In FreeBSD, we need to return the
189			 * high-order flags, as we're using
190			 * IFF_PPROMISC, which is in those bits.
191			 *
192			 * XXX - DragonFly BSD?
193			 */
194			*if_flags |= (ifr.ifr_flagshigh << 16);
195#endif /* __FreeBSD__ */
196		}
197	}
198#ifdef linux
199	close(fd);
200#endif /* linux */
201	return error ? -1 : 0;
202}
203
204
205static void
206pcap_netmap_close(pcap_t *p)
207{
208	struct pcap_netmap *pn = p->priv;
209	struct nm_desc *d = pn->d;
210	uint32_t if_flags = 0;
211
212	if (pn->must_clear_promisc) {
213		pcap_netmap_ioctl(p, SIOCGIFFLAGS, &if_flags); /* fetch flags */
214		if (if_flags & IFF_PPROMISC) {
215			if_flags &= ~IFF_PPROMISC;
216			pcap_netmap_ioctl(p, SIOCSIFFLAGS, &if_flags);
217		}
218	}
219	nm_close(d);
220	pcap_cleanup_live_common(p);
221}
222
223
224static int
225pcap_netmap_activate(pcap_t *p)
226{
227	struct pcap_netmap *pn = p->priv;
228	struct nm_desc *d;
229	uint32_t if_flags = 0;
230
231	d = nm_open(p->opt.device, NULL, 0, NULL);
232	if (d == NULL) {
233		pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
234		    errno, "netmap open: cannot access %s",
235		    p->opt.device);
236		pcap_cleanup_live_common(p);
237		return (PCAP_ERROR);
238	}
239#if 0
240	fprintf(stderr, "%s device %s priv %p fd %d ports %d..%d\n",
241	    __FUNCTION__, p->opt.device, d, d->fd,
242	    d->first_rx_ring, d->last_rx_ring);
243#endif
244	pn->d = d;
245	p->fd = d->fd;
246
247	/*
248	 * Turn a negative snapshot value (invalid), a snapshot value of
249	 * 0 (unspecified), or a value bigger than the normal maximum
250	 * value, into the maximum allowed value.
251	 *
252	 * If some application really *needs* a bigger snapshot
253	 * length, we should just increase MAXIMUM_SNAPLEN.
254	 */
255	if (p->snapshot <= 0 || p->snapshot > MAXIMUM_SNAPLEN)
256		p->snapshot = MAXIMUM_SNAPLEN;
257
258	if (p->opt.promisc && !(d->req.nr_ringid & NETMAP_SW_RING)) {
259		pcap_netmap_ioctl(p, SIOCGIFFLAGS, &if_flags); /* fetch flags */
260		if (!(if_flags & IFF_PPROMISC)) {
261			pn->must_clear_promisc = 1;
262			if_flags |= IFF_PPROMISC;
263			pcap_netmap_ioctl(p, SIOCSIFFLAGS, &if_flags);
264		}
265	}
266	p->linktype = DLT_EN10MB;
267	p->selectable_fd = p->fd;
268	p->read_op = pcap_netmap_dispatch;
269	p->inject_op = pcap_netmap_inject;
270	p->setfilter_op = install_bpf_program;
271	p->setdirection_op = NULL;
272	p->set_datalink_op = NULL;
273	p->getnonblock_op = pcap_getnonblock_fd;
274	p->setnonblock_op = pcap_setnonblock_fd;
275	p->stats_op = pcap_netmap_stats;
276	p->cleanup_op = pcap_netmap_close;
277
278	return (0);
279}
280
281
282pcap_t *
283pcap_netmap_create(const char *device, char *ebuf, int *is_ours)
284{
285	pcap_t *p;
286
287	*is_ours = (!strncmp(device, "netmap:", 7) || !strncmp(device, "vale", 4));
288	if (! *is_ours)
289		return NULL;
290	p = pcap_create_common(ebuf, sizeof (struct pcap_netmap));
291	if (p == NULL)
292		return (NULL);
293	p->activate_op = pcap_netmap_activate;
294	return (p);
295}
296
297/*
298 * The "device name" for netmap devices isn't a name for a device, it's
299 * an expression that indicates how the device should be set up, so
300 * there's no way to enumerate them.
301 */
302int
303pcap_netmap_findalldevs(pcap_if_list_t *devlistp _U_, char *err_str _U_)
304{
305	return 0;
306}
307