1335640Shselasky/*
2335640Shselasky * Copyright (C) 2014 Luigi Rizzo. All rights reserved.
3335640Shselasky *
4335640Shselasky * Redistribution and use in source and binary forms, with or without
5335640Shselasky * modification, are permitted provided that the following conditions
6335640Shselasky * are met:
7335640Shselasky *
8335640Shselasky *   1. Redistributions of source code must retain the above copyright
9335640Shselasky *      notice, this list of conditions and the following disclaimer.
10335640Shselasky *   2. Redistributions in binary form must reproduce the above copyright
11335640Shselasky *      notice, this list of conditions and the following disclaimer in the
12335640Shselasky *      documentation and/or other materials provided with the distribution.
13335640Shselasky *
14335640Shselasky * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''AND
15335640Shselasky * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16335640Shselasky * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17335640Shselasky * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18335640Shselasky * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19335640Shselasky * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20335640Shselasky * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21335640Shselasky * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22335640Shselasky * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23335640Shselasky * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24335640Shselasky * SUCH DAMAGE.
25335640Shselasky */
26335640Shselasky
27335640Shselasky#ifdef HAVE_CONFIG_H
28335640Shselasky#include <config.h>
29335640Shselasky#endif
30335640Shselasky
31335640Shselasky#include <poll.h>
32335640Shselasky#include <ctype.h>
33335640Shselasky#include <errno.h>
34335640Shselasky#include <netdb.h>
35335640Shselasky#include <stdio.h>
36335640Shselasky#include <stdlib.h>
37335640Shselasky#include <string.h>
38335640Shselasky#include <unistd.h>
39335640Shselasky
40335640Shselasky#define NETMAP_WITH_LIBS
41335640Shselasky#include <net/netmap_user.h>
42335640Shselasky
43335640Shselasky#include "pcap-int.h"
44335640Shselasky#include "pcap-netmap.h"
45335640Shselasky
46335640Shselasky#ifndef __FreeBSD__
47335640Shselasky  /*
48335640Shselasky   * On FreeBSD we use IFF_PPROMISC which is in ifr_flagshigh.
49335640Shselasky   * Remap to IFF_PROMISC on other platforms.
50335640Shselasky   *
51335640Shselasky   * XXX - DragonFly BSD?
52335640Shselasky   */
53335640Shselasky  #define IFF_PPROMISC	IFF_PROMISC
54335640Shselasky#endif /* __FreeBSD__ */
55335640Shselasky
56335640Shselaskystruct pcap_netmap {
57335640Shselasky	struct nm_desc *d;	/* pointer returned by nm_open() */
58335640Shselasky	pcap_handler cb;	/* callback and argument */
59335640Shselasky	u_char *cb_arg;
60335640Shselasky	int must_clear_promisc;	/* flag */
61335640Shselasky	uint64_t rx_pkts;	/* # of pkts received before the filter */
62335640Shselasky};
63335640Shselasky
64335640Shselasky
65335640Shselaskystatic int
66335640Shselaskypcap_netmap_stats(pcap_t *p, struct pcap_stat *ps)
67335640Shselasky{
68335640Shselasky	struct pcap_netmap *pn = p->priv;
69335640Shselasky
70356341Scy	ps->ps_recv = (u_int)pn->rx_pkts;
71335640Shselasky	ps->ps_drop = 0;
72335640Shselasky	ps->ps_ifdrop = 0;
73335640Shselasky	return 0;
74335640Shselasky}
75335640Shselasky
76335640Shselasky
77335640Shselaskystatic void
78335640Shselaskypcap_netmap_filter(u_char *arg, struct pcap_pkthdr *h, const u_char *buf)
79335640Shselasky{
80335640Shselasky	pcap_t *p = (pcap_t *)arg;
81335640Shselasky	struct pcap_netmap *pn = p->priv;
82335640Shselasky	const struct bpf_insn *pc = p->fcode.bf_insns;
83335640Shselasky
84335640Shselasky	++pn->rx_pkts;
85335640Shselasky	if (pc == NULL || bpf_filter(pc, buf, h->len, h->caplen))
86335640Shselasky		pn->cb(pn->cb_arg, h, buf);
87335640Shselasky}
88335640Shselasky
89335640Shselasky
90335640Shselaskystatic int
91335640Shselaskypcap_netmap_dispatch(pcap_t *p, int cnt, pcap_handler cb, u_char *user)
92335640Shselasky{
93335640Shselasky	int ret;
94335640Shselasky	struct pcap_netmap *pn = p->priv;
95335640Shselasky	struct nm_desc *d = pn->d;
96335640Shselasky	struct pollfd pfd = { .fd = p->fd, .events = POLLIN, .revents = 0 };
97335640Shselasky
98335640Shselasky	pn->cb = cb;
99335640Shselasky	pn->cb_arg = user;
100335640Shselasky
101335640Shselasky	for (;;) {
102335640Shselasky		if (p->break_loop) {
103335640Shselasky			p->break_loop = 0;
104335640Shselasky			return PCAP_ERROR_BREAK;
105335640Shselasky		}
106335640Shselasky		/* nm_dispatch won't run forever */
107335640Shselasky
108335640Shselasky		ret = nm_dispatch((void *)d, cnt, (void *)pcap_netmap_filter, (void *)p);
109335640Shselasky		if (ret != 0)
110335640Shselasky			break;
111335640Shselasky		errno = 0;
112335640Shselasky		ret = poll(&pfd, 1, p->opt.timeout);
113335640Shselasky	}
114335640Shselasky	return ret;
115335640Shselasky}
116335640Shselasky
117335640Shselasky
118335640Shselasky/* XXX need to check the NIOCTXSYNC/poll */
119335640Shselaskystatic int
120335640Shselaskypcap_netmap_inject(pcap_t *p, const void *buf, size_t size)
121335640Shselasky{
122335640Shselasky	struct pcap_netmap *pn = p->priv;
123335640Shselasky	struct nm_desc *d = pn->d;
124335640Shselasky
125335640Shselasky	return nm_inject(d, buf, size);
126335640Shselasky}
127335640Shselasky
128335640Shselasky
129335640Shselaskystatic int
130335640Shselaskypcap_netmap_ioctl(pcap_t *p, u_long what, uint32_t *if_flags)
131335640Shselasky{
132335640Shselasky	struct pcap_netmap *pn = p->priv;
133335640Shselasky	struct nm_desc *d = pn->d;
134335640Shselasky	struct ifreq ifr;
135335640Shselasky	int error, fd = d->fd;
136335640Shselasky
137335640Shselasky#ifdef linux
138335640Shselasky	fd = socket(AF_INET, SOCK_DGRAM, 0);
139335640Shselasky	if (fd < 0) {
140335640Shselasky		fprintf(stderr, "Error: cannot get device control socket.\n");
141335640Shselasky		return -1;
142335640Shselasky	}
143335640Shselasky#endif /* linux */
144335640Shselasky	bzero(&ifr, sizeof(ifr));
145335640Shselasky	strncpy(ifr.ifr_name, d->req.nr_name, sizeof(ifr.ifr_name));
146335640Shselasky	switch (what) {
147335640Shselasky	case SIOCSIFFLAGS:
148335640Shselasky		/*
149335640Shselasky		 * The flags we pass in are 32-bit and unsigned.
150335640Shselasky		 *
151335640Shselasky		 * On most if not all UN*Xes, ifr_flags is 16-bit and
152335640Shselasky		 * signed, and the result of assigning a longer
153335640Shselasky		 * unsigned value to a shorter signed value is
154335640Shselasky		 * implementation-defined (even if, in practice, it'll
155335640Shselasky		 * do what's intended on all platforms we support
156335640Shselasky		 * result of assigning a 32-bit unsigned value).
157335640Shselasky		 * So we mask out the upper 16 bits.
158335640Shselasky		 */
159335640Shselasky		ifr.ifr_flags = *if_flags & 0xffff;
160335640Shselasky#ifdef __FreeBSD__
161335640Shselasky		/*
162335640Shselasky		 * In FreeBSD, we need to set the high-order flags,
163335640Shselasky		 * as we're using IFF_PPROMISC, which is in those bits.
164335640Shselasky		 *
165335640Shselasky		 * XXX - DragonFly BSD?
166335640Shselasky		 */
167335640Shselasky		ifr.ifr_flagshigh = *if_flags >> 16;
168335640Shselasky#endif /* __FreeBSD__ */
169335640Shselasky		break;
170335640Shselasky	}
171335640Shselasky	error = ioctl(fd, what, &ifr);
172335640Shselasky	if (!error) {
173335640Shselasky		switch (what) {
174335640Shselasky		case SIOCGIFFLAGS:
175335640Shselasky			/*
176335640Shselasky			 * The flags we return are 32-bit.
177335640Shselasky			 *
178335640Shselasky			 * On most if not all UN*Xes, ifr_flags is
179335640Shselasky			 * 16-bit and signed, and will get sign-
180335640Shselasky			 * extended, so that the upper 16 bits of
181335640Shselasky			 * those flags will be forced on.  So we
182335640Shselasky			 * mask out the upper 16 bits of the
183335640Shselasky			 * sign-extended value.
184335640Shselasky			 */
185335640Shselasky			*if_flags = ifr.ifr_flags & 0xffff;
186335640Shselasky#ifdef __FreeBSD__
187335640Shselasky			/*
188335640Shselasky			 * In FreeBSD, we need to return the
189335640Shselasky			 * high-order flags, as we're using
190335640Shselasky			 * IFF_PPROMISC, which is in those bits.
191335640Shselasky			 *
192335640Shselasky			 * XXX - DragonFly BSD?
193335640Shselasky			 */
194335640Shselasky			*if_flags |= (ifr.ifr_flagshigh << 16);
195335640Shselasky#endif /* __FreeBSD__ */
196335640Shselasky		}
197335640Shselasky	}
198335640Shselasky#ifdef linux
199335640Shselasky	close(fd);
200335640Shselasky#endif /* linux */
201335640Shselasky	return error ? -1 : 0;
202335640Shselasky}
203335640Shselasky
204335640Shselasky
205335640Shselaskystatic void
206335640Shselaskypcap_netmap_close(pcap_t *p)
207335640Shselasky{
208335640Shselasky	struct pcap_netmap *pn = p->priv;
209335640Shselasky	struct nm_desc *d = pn->d;
210335640Shselasky	uint32_t if_flags = 0;
211335640Shselasky
212335640Shselasky	if (pn->must_clear_promisc) {
213335640Shselasky		pcap_netmap_ioctl(p, SIOCGIFFLAGS, &if_flags); /* fetch flags */
214335640Shselasky		if (if_flags & IFF_PPROMISC) {
215335640Shselasky			if_flags &= ~IFF_PPROMISC;
216335640Shselasky			pcap_netmap_ioctl(p, SIOCSIFFLAGS, &if_flags);
217335640Shselasky		}
218335640Shselasky	}
219335640Shselasky	nm_close(d);
220335640Shselasky	pcap_cleanup_live_common(p);
221335640Shselasky}
222335640Shselasky
223335640Shselasky
224335640Shselaskystatic int
225335640Shselaskypcap_netmap_activate(pcap_t *p)
226335640Shselasky{
227335640Shselasky	struct pcap_netmap *pn = p->priv;
228335640Shselasky	struct nm_desc *d;
229335640Shselasky	uint32_t if_flags = 0;
230335640Shselasky
231335640Shselasky	d = nm_open(p->opt.device, NULL, 0, NULL);
232335640Shselasky	if (d == NULL) {
233335640Shselasky		pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
234335640Shselasky		    errno, "netmap open: cannot access %s",
235335640Shselasky		    p->opt.device);
236335640Shselasky		pcap_cleanup_live_common(p);
237335640Shselasky		return (PCAP_ERROR);
238335640Shselasky	}
239335640Shselasky#if 0
240335640Shselasky	fprintf(stderr, "%s device %s priv %p fd %d ports %d..%d\n",
241335640Shselasky	    __FUNCTION__, p->opt.device, d, d->fd,
242335640Shselasky	    d->first_rx_ring, d->last_rx_ring);
243335640Shselasky#endif
244335640Shselasky	pn->d = d;
245335640Shselasky	p->fd = d->fd;
246335640Shselasky
247335640Shselasky	/*
248335640Shselasky	 * Turn a negative snapshot value (invalid), a snapshot value of
249335640Shselasky	 * 0 (unspecified), or a value bigger than the normal maximum
250335640Shselasky	 * value, into the maximum allowed value.
251335640Shselasky	 *
252335640Shselasky	 * If some application really *needs* a bigger snapshot
253335640Shselasky	 * length, we should just increase MAXIMUM_SNAPLEN.
254335640Shselasky	 */
255335640Shselasky	if (p->snapshot <= 0 || p->snapshot > MAXIMUM_SNAPLEN)
256335640Shselasky		p->snapshot = MAXIMUM_SNAPLEN;
257335640Shselasky
258335640Shselasky	if (p->opt.promisc && !(d->req.nr_ringid & NETMAP_SW_RING)) {
259335640Shselasky		pcap_netmap_ioctl(p, SIOCGIFFLAGS, &if_flags); /* fetch flags */
260335640Shselasky		if (!(if_flags & IFF_PPROMISC)) {
261335640Shselasky			pn->must_clear_promisc = 1;
262335640Shselasky			if_flags |= IFF_PPROMISC;
263335640Shselasky			pcap_netmap_ioctl(p, SIOCSIFFLAGS, &if_flags);
264335640Shselasky		}
265335640Shselasky	}
266335640Shselasky	p->linktype = DLT_EN10MB;
267335640Shselasky	p->selectable_fd = p->fd;
268335640Shselasky	p->read_op = pcap_netmap_dispatch;
269335640Shselasky	p->inject_op = pcap_netmap_inject;
270335640Shselasky	p->setfilter_op = install_bpf_program;
271335640Shselasky	p->setdirection_op = NULL;
272335640Shselasky	p->set_datalink_op = NULL;
273335640Shselasky	p->getnonblock_op = pcap_getnonblock_fd;
274335640Shselasky	p->setnonblock_op = pcap_setnonblock_fd;
275335640Shselasky	p->stats_op = pcap_netmap_stats;
276335640Shselasky	p->cleanup_op = pcap_netmap_close;
277335640Shselasky
278335640Shselasky	return (0);
279335640Shselasky}
280335640Shselasky
281335640Shselasky
282335640Shselaskypcap_t *
283335640Shselaskypcap_netmap_create(const char *device, char *ebuf, int *is_ours)
284335640Shselasky{
285335640Shselasky	pcap_t *p;
286335640Shselasky
287335640Shselasky	*is_ours = (!strncmp(device, "netmap:", 7) || !strncmp(device, "vale", 4));
288335640Shselasky	if (! *is_ours)
289335640Shselasky		return NULL;
290335640Shselasky	p = pcap_create_common(ebuf, sizeof (struct pcap_netmap));
291335640Shselasky	if (p == NULL)
292335640Shselasky		return (NULL);
293335640Shselasky	p->activate_op = pcap_netmap_activate;
294335640Shselasky	return (p);
295335640Shselasky}
296335640Shselasky
297335640Shselasky/*
298335640Shselasky * The "device name" for netmap devices isn't a name for a device, it's
299335640Shselasky * an expression that indicates how the device should be set up, so
300335640Shselasky * there's no way to enumerate them.
301335640Shselasky */
302335640Shselaskyint
303335640Shselaskypcap_netmap_findalldevs(pcap_if_list_t *devlistp _U_, char *err_str _U_)
304335640Shselasky{
305335640Shselasky	return 0;
306335640Shselasky}
307