bpf.c revision 177596
1/*-
2 * Copyright (c) 1990, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from the Stanford/CMU enet packet filter,
6 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
7 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
8 * Berkeley Laboratory.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *      @(#)bpf.c	8.4 (Berkeley) 1/9/95
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD: head/sys/net/bpf.c 177596 2008-03-25 07:41:33Z rwatson $");
39
40#include "opt_bpf.h"
41#include "opt_mac.h"
42#include "opt_netgraph.h"
43
44#include <sys/types.h>
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/conf.h>
48#include <sys/fcntl.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/time.h>
52#include <sys/priv.h>
53#include <sys/proc.h>
54#include <sys/signalvar.h>
55#include <sys/filio.h>
56#include <sys/sockio.h>
57#include <sys/ttycom.h>
58#include <sys/uio.h>
59
60#include <sys/event.h>
61#include <sys/file.h>
62#include <sys/poll.h>
63#include <sys/proc.h>
64
65#include <sys/socket.h>
66
67#include <net/if.h>
68#include <net/bpf.h>
69#include <net/bpf_buffer.h>
70#ifdef BPF_JITTER
71#include <net/bpf_jitter.h>
72#endif
73#include <net/bpf_zerocopy.h>
74#include <net/bpfdesc.h>
75
76#include <netinet/in.h>
77#include <netinet/if_ether.h>
78#include <sys/kernel.h>
79#include <sys/sysctl.h>
80
81#include <net80211/ieee80211_freebsd.h>
82
83#include <security/mac/mac_framework.h>
84
85MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
86
87#if defined(DEV_BPF) || defined(NETGRAPH_BPF)
88
89#define PRINET  26			/* interruptible */
90
91#define	M_SKIP_BPF	M_SKIP_FIREWALL
92
93/*
94 * bpf_iflist is a list of BPF interface structures, each corresponding to a
95 * specific DLT.  The same network interface might have several BPF interface
96 * structures registered by different layers in the stack (i.e., 802.11
97 * frames, ethernet frames, etc).
98 */
99static LIST_HEAD(, bpf_if)	bpf_iflist;
100static struct mtx	bpf_mtx;		/* bpf global lock */
101static int		bpf_bpfd_cnt;
102
103static void	bpf_attachd(struct bpf_d *, struct bpf_if *);
104static void	bpf_detachd(struct bpf_d *);
105static void	bpf_freed(struct bpf_d *);
106static int	bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
107		    struct sockaddr *, int *, struct bpf_insn *);
108static int	bpf_setif(struct bpf_d *, struct ifreq *);
109static void	bpf_timed_out(void *);
110static __inline void
111		bpf_wakeup(struct bpf_d *);
112static void	catchpacket(struct bpf_d *, u_char *, u_int, u_int,
113		    void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int),
114		    struct timeval *);
115static void	reset_d(struct bpf_d *);
116static int	 bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
117static int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
118static int	bpf_setdlt(struct bpf_d *, u_int);
119static void	filt_bpfdetach(struct knote *);
120static int	filt_bpfread(struct knote *, long);
121static void	bpf_drvinit(void *);
122static void	bpf_clone(void *, struct ucred *, char *, int, struct cdev **);
123static int	bpf_stats_sysctl(SYSCTL_HANDLER_ARGS);
124
125SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl");
126static int bpf_maxinsns = BPF_MAXINSNS;
127SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
128    &bpf_maxinsns, 0, "Maximum bpf program instructions");
129static int bpf_zerocopy_enable = 0;
130SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW,
131    &bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions");
132SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_RW,
133    bpf_stats_sysctl, "bpf statistics portal");
134
135static	d_open_t	bpfopen;
136static	d_close_t	bpfclose;
137static	d_read_t	bpfread;
138static	d_write_t	bpfwrite;
139static	d_ioctl_t	bpfioctl;
140static	d_poll_t	bpfpoll;
141static	d_kqfilter_t	bpfkqfilter;
142
143static struct cdevsw bpf_cdevsw = {
144	.d_version =	D_VERSION,
145	.d_open =	bpfopen,
146	.d_close =	bpfclose,
147	.d_read =	bpfread,
148	.d_write =	bpfwrite,
149	.d_ioctl =	bpfioctl,
150	.d_poll =	bpfpoll,
151	.d_name =	"bpf",
152	.d_kqfilter =	bpfkqfilter,
153};
154
155static struct filterops bpfread_filtops =
156	{ 1, NULL, filt_bpfdetach, filt_bpfread };
157
158/*
159 * Wrapper functions for various buffering methods.  If the set of buffer
160 * modes expands, we will probably want to introduce a switch data structure
161 * similar to protosw, et.
162 */
163static void
164bpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
165    u_int len)
166{
167
168	BPFD_LOCK_ASSERT(d);
169
170	switch (d->bd_bufmode) {
171	case BPF_BUFMODE_BUFFER:
172		return (bpf_buffer_append_bytes(d, buf, offset, src, len));
173
174	case BPF_BUFMODE_ZBUF:
175		d->bd_zcopy++;
176		return (bpf_zerocopy_append_bytes(d, buf, offset, src, len));
177
178	default:
179		panic("bpf_buf_append_bytes");
180	}
181}
182
183static void
184bpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
185    u_int len)
186{
187
188	BPFD_LOCK_ASSERT(d);
189
190	switch (d->bd_bufmode) {
191	case BPF_BUFMODE_BUFFER:
192		return (bpf_buffer_append_mbuf(d, buf, offset, src, len));
193
194	case BPF_BUFMODE_ZBUF:
195		d->bd_zcopy++;
196		return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len));
197
198	default:
199		panic("bpf_buf_append_mbuf");
200	}
201}
202
203/*
204 * If the buffer mechanism has a way to decide that a held buffer can be made
205 * free, then it is exposed via the bpf_canfreebuf() interface.  (1) is
206 * returned if the buffer can be discarded, (0) is returned if it cannot.
207 */
208static int
209bpf_canfreebuf(struct bpf_d *d)
210{
211
212	BPFD_LOCK_ASSERT(d);
213
214	switch (d->bd_bufmode) {
215	case BPF_BUFMODE_ZBUF:
216		return (bpf_zerocopy_canfreebuf(d));
217	}
218	return (0);
219}
220
221void
222bpf_bufheld(struct bpf_d *d)
223{
224
225	BPFD_LOCK_ASSERT(d);
226
227	switch (d->bd_bufmode) {
228	case BPF_BUFMODE_ZBUF:
229		bpf_zerocopy_bufheld(d);
230		break;
231	}
232}
233
234static void
235bpf_free(struct bpf_d *d)
236{
237
238	switch (d->bd_bufmode) {
239	case BPF_BUFMODE_BUFFER:
240		return (bpf_buffer_free(d));
241
242	case BPF_BUFMODE_ZBUF:
243		return (bpf_zerocopy_free(d));
244
245	default:
246		panic("bpf_buf_free");
247	}
248}
249
250static int
251bpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio)
252{
253
254	if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
255		return (EOPNOTSUPP);
256	return (bpf_buffer_uiomove(d, buf, len, uio));
257}
258
259static int
260bpf_ioctl_sblen(struct bpf_d *d, u_int *i)
261{
262
263	if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
264		return (EOPNOTSUPP);
265	return (bpf_buffer_ioctl_sblen(d, i));
266}
267
268static int
269bpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i)
270{
271
272	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
273		return (EOPNOTSUPP);
274	return (bpf_zerocopy_ioctl_getzmax(td, d, i));
275}
276
277static int
278bpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
279{
280
281	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
282		return (EOPNOTSUPP);
283	return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz));
284}
285
286static int
287bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
288{
289
290	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
291		return (EOPNOTSUPP);
292	return (bpf_zerocopy_ioctl_setzbuf(td, d, bz));
293}
294
295/*
296 * General BPF functions.
297 */
298static int
299bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
300    struct sockaddr *sockp, int *hdrlen, struct bpf_insn *wfilter)
301{
302	const struct ieee80211_bpf_params *p;
303	struct ether_header *eh;
304	struct mbuf *m;
305	int error;
306	int len;
307	int hlen;
308	int slen;
309
310	/*
311	 * Build a sockaddr based on the data link layer type.
312	 * We do this at this level because the ethernet header
313	 * is copied directly into the data field of the sockaddr.
314	 * In the case of SLIP, there is no header and the packet
315	 * is forwarded as is.
316	 * Also, we are careful to leave room at the front of the mbuf
317	 * for the link level header.
318	 */
319	switch (linktype) {
320
321	case DLT_SLIP:
322		sockp->sa_family = AF_INET;
323		hlen = 0;
324		break;
325
326	case DLT_EN10MB:
327		sockp->sa_family = AF_UNSPEC;
328		/* XXX Would MAXLINKHDR be better? */
329		hlen = ETHER_HDR_LEN;
330		break;
331
332	case DLT_FDDI:
333		sockp->sa_family = AF_IMPLINK;
334		hlen = 0;
335		break;
336
337	case DLT_RAW:
338		sockp->sa_family = AF_UNSPEC;
339		hlen = 0;
340		break;
341
342	case DLT_NULL:
343		/*
344		 * null interface types require a 4 byte pseudo header which
345		 * corresponds to the address family of the packet.
346		 */
347		sockp->sa_family = AF_UNSPEC;
348		hlen = 4;
349		break;
350
351	case DLT_ATM_RFC1483:
352		/*
353		 * en atm driver requires 4-byte atm pseudo header.
354		 * though it isn't standard, vpi:vci needs to be
355		 * specified anyway.
356		 */
357		sockp->sa_family = AF_UNSPEC;
358		hlen = 12;	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
359		break;
360
361	case DLT_PPP:
362		sockp->sa_family = AF_UNSPEC;
363		hlen = 4;	/* This should match PPP_HDRLEN */
364		break;
365
366	case DLT_IEEE802_11:		/* IEEE 802.11 wireless */
367		sockp->sa_family = AF_IEEE80211;
368		hlen = 0;
369		break;
370
371	case DLT_IEEE802_11_RADIO:	/* IEEE 802.11 wireless w/ phy params */
372		sockp->sa_family = AF_IEEE80211;
373		sockp->sa_len = 12;	/* XXX != 0 */
374		hlen = sizeof(struct ieee80211_bpf_params);
375		break;
376
377	default:
378		return (EIO);
379	}
380
381	len = uio->uio_resid;
382
383	if (len - hlen > ifp->if_mtu)
384		return (EMSGSIZE);
385
386	if ((unsigned)len > MCLBYTES)
387		return (EIO);
388
389	if (len > MHLEN) {
390		m = m_getcl(M_TRYWAIT, MT_DATA, M_PKTHDR);
391	} else {
392		MGETHDR(m, M_TRYWAIT, MT_DATA);
393	}
394	if (m == NULL)
395		return (ENOBUFS);
396	m->m_pkthdr.len = m->m_len = len;
397	m->m_pkthdr.rcvif = NULL;
398	*mp = m;
399
400	if (m->m_len < hlen) {
401		error = EPERM;
402		goto bad;
403	}
404
405	error = uiomove(mtod(m, u_char *), len, uio);
406	if (error)
407		goto bad;
408
409	slen = bpf_filter(wfilter, mtod(m, u_char *), len, len);
410	if (slen == 0) {
411		error = EPERM;
412		goto bad;
413	}
414
415	/* Check for multicast destination */
416	switch (linktype) {
417	case DLT_EN10MB:
418		eh = mtod(m, struct ether_header *);
419		if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
420			if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost,
421			    ETHER_ADDR_LEN) == 0)
422				m->m_flags |= M_BCAST;
423			else
424				m->m_flags |= M_MCAST;
425		}
426		break;
427	}
428
429	/*
430	 * Make room for link header, and copy it to sockaddr
431	 */
432	if (hlen != 0) {
433		if (sockp->sa_family == AF_IEEE80211) {
434			/*
435			 * Collect true length from the parameter header
436			 * NB: sockp is known to be zero'd so if we do a
437			 *     short copy unspecified parameters will be
438			 *     zero.
439			 * NB: packet may not be aligned after stripping
440			 *     bpf params
441			 * XXX check ibp_vers
442			 */
443			p = mtod(m, const struct ieee80211_bpf_params *);
444			hlen = p->ibp_len;
445			if (hlen > sizeof(sockp->sa_data)) {
446				error = EINVAL;
447				goto bad;
448			}
449		}
450		bcopy(m->m_data, sockp->sa_data, hlen);
451	}
452	*hdrlen = hlen;
453
454	return (0);
455bad:
456	m_freem(m);
457	return (error);
458}
459
460/*
461 * Attach file to the bpf interface, i.e. make d listen on bp.
462 */
463static void
464bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
465{
466	/*
467	 * Point d at bp, and add d to the interface's list of listeners.
468	 * Finally, point the driver's bpf cookie at the interface so
469	 * it will divert packets to bpf.
470	 */
471	BPFIF_LOCK(bp);
472	d->bd_bif = bp;
473	LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
474
475	bpf_bpfd_cnt++;
476	BPFIF_UNLOCK(bp);
477}
478
479/*
480 * Detach a file from its interface.
481 */
482static void
483bpf_detachd(struct bpf_d *d)
484{
485	int error;
486	struct bpf_if *bp;
487	struct ifnet *ifp;
488
489	bp = d->bd_bif;
490	BPFIF_LOCK(bp);
491	BPFD_LOCK(d);
492	ifp = d->bd_bif->bif_ifp;
493
494	/*
495	 * Remove d from the interface's descriptor list.
496	 */
497	LIST_REMOVE(d, bd_next);
498
499	bpf_bpfd_cnt--;
500	d->bd_bif = NULL;
501	BPFD_UNLOCK(d);
502	BPFIF_UNLOCK(bp);
503
504	/*
505	 * Check if this descriptor had requested promiscuous mode.
506	 * If so, turn it off.
507	 */
508	if (d->bd_promisc) {
509		d->bd_promisc = 0;
510		error = ifpromisc(ifp, 0);
511		if (error != 0 && error != ENXIO) {
512			/*
513			 * ENXIO can happen if a pccard is unplugged
514			 * Something is really wrong if we were able to put
515			 * the driver into promiscuous mode, but can't
516			 * take it out.
517			 */
518			if_printf(bp->bif_ifp,
519				"bpf_detach: ifpromisc failed (%d)\n", error);
520		}
521	}
522}
523
524/*
525 * Open ethernet device.  Returns ENXIO for illegal minor device number,
526 * EBUSY if file is open by another process.
527 */
528/* ARGSUSED */
529static	int
530bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
531{
532	struct bpf_d *d;
533
534	mtx_lock(&bpf_mtx);
535	d = dev->si_drv1;
536	/*
537	 * Each minor can be opened by only one process.  If the requested
538	 * minor is in use, return EBUSY.
539	 */
540	if (d != NULL) {
541		mtx_unlock(&bpf_mtx);
542		return (EBUSY);
543	}
544	dev->si_drv1 = (struct bpf_d *)~0;	/* mark device in use */
545	mtx_unlock(&bpf_mtx);
546
547	if ((dev->si_flags & SI_NAMED) == 0)
548		make_dev(&bpf_cdevsw, minor(dev), UID_ROOT, GID_WHEEL, 0600,
549		    "bpf%d", dev2unit(dev));
550	MALLOC(d, struct bpf_d *, sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
551	dev->si_drv1 = d;
552
553	/*
554	 * For historical reasons, perform a one-time initialization call to
555	 * the buffer routines, even though we're not yet committed to a
556	 * particular buffer method.
557	 */
558	bpf_buffer_init(d);
559	d->bd_bufmode = BPF_BUFMODE_BUFFER;
560	d->bd_sig = SIGIO;
561	d->bd_direction = BPF_D_INOUT;
562	d->bd_pid = td->td_proc->p_pid;
563#ifdef MAC
564	mac_bpfdesc_init(d);
565	mac_bpfdesc_create(td->td_ucred, d);
566#endif
567	mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
568	callout_init(&d->bd_callout, CALLOUT_MPSAFE);
569	knlist_init(&d->bd_sel.si_note, &d->bd_mtx, NULL, NULL, NULL);
570
571	return (0);
572}
573
574/*
575 * Close the descriptor by detaching it from its interface,
576 * deallocating its buffers, and marking it free.
577 */
578/* ARGSUSED */
579static	int
580bpfclose(struct cdev *dev, int flags, int fmt, struct thread *td)
581{
582	struct bpf_d *d = dev->si_drv1;
583
584	BPFD_LOCK(d);
585	if (d->bd_state == BPF_WAITING)
586		callout_stop(&d->bd_callout);
587	d->bd_state = BPF_IDLE;
588	BPFD_UNLOCK(d);
589	funsetown(&d->bd_sigio);
590	mtx_lock(&bpf_mtx);
591	if (d->bd_bif)
592		bpf_detachd(d);
593	mtx_unlock(&bpf_mtx);
594	selwakeuppri(&d->bd_sel, PRINET);
595#ifdef MAC
596	mac_bpfdesc_destroy(d);
597#endif /* MAC */
598	knlist_destroy(&d->bd_sel.si_note);
599	bpf_freed(d);
600	dev->si_drv1 = NULL;
601	free(d, M_BPF);
602
603	return (0);
604}
605
606/*
607 *  bpfread - read next chunk of packets from buffers
608 */
609static	int
610bpfread(struct cdev *dev, struct uio *uio, int ioflag)
611{
612	struct bpf_d *d = dev->si_drv1;
613	int timed_out;
614	int error;
615
616	/*
617	 * Restrict application to use a buffer the same size as
618	 * as kernel buffers.
619	 */
620	if (uio->uio_resid != d->bd_bufsize)
621		return (EINVAL);
622
623	BPFD_LOCK(d);
624	d->bd_pid = curthread->td_proc->p_pid;
625	if (d->bd_bufmode != BPF_BUFMODE_BUFFER) {
626		BPFD_UNLOCK(d);
627		return (EOPNOTSUPP);
628	}
629	if (d->bd_state == BPF_WAITING)
630		callout_stop(&d->bd_callout);
631	timed_out = (d->bd_state == BPF_TIMED_OUT);
632	d->bd_state = BPF_IDLE;
633	/*
634	 * If the hold buffer is empty, then do a timed sleep, which
635	 * ends when the timeout expires or when enough packets
636	 * have arrived to fill the store buffer.
637	 */
638	while (d->bd_hbuf == NULL) {
639		if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
640			/*
641			 * A packet(s) either arrived since the previous
642			 * read or arrived while we were asleep.
643			 * Rotate the buffers and return what's here.
644			 */
645			ROTATE_BUFFERS(d);
646			break;
647		}
648
649		/*
650		 * No data is available, check to see if the bpf device
651		 * is still pointed at a real interface.  If not, return
652		 * ENXIO so that the userland process knows to rebind
653		 * it before using it again.
654		 */
655		if (d->bd_bif == NULL) {
656			BPFD_UNLOCK(d);
657			return (ENXIO);
658		}
659
660		if (ioflag & O_NONBLOCK) {
661			BPFD_UNLOCK(d);
662			return (EWOULDBLOCK);
663		}
664		error = msleep(d, &d->bd_mtx, PRINET|PCATCH,
665		     "bpf", d->bd_rtout);
666		if (error == EINTR || error == ERESTART) {
667			BPFD_UNLOCK(d);
668			return (error);
669		}
670		if (error == EWOULDBLOCK) {
671			/*
672			 * On a timeout, return what's in the buffer,
673			 * which may be nothing.  If there is something
674			 * in the store buffer, we can rotate the buffers.
675			 */
676			if (d->bd_hbuf)
677				/*
678				 * We filled up the buffer in between
679				 * getting the timeout and arriving
680				 * here, so we don't need to rotate.
681				 */
682				break;
683
684			if (d->bd_slen == 0) {
685				BPFD_UNLOCK(d);
686				return (0);
687			}
688			ROTATE_BUFFERS(d);
689			break;
690		}
691	}
692	/*
693	 * At this point, we know we have something in the hold slot.
694	 */
695	BPFD_UNLOCK(d);
696
697	/*
698	 * Move data from hold buffer into user space.
699	 * We know the entire buffer is transferred since
700	 * we checked above that the read buffer is bpf_bufsize bytes.
701	 *
702	 * XXXRW: More synchronization needed here: what if a second thread
703	 * issues a read on the same fd at the same time?  Don't want this
704	 * getting invalidated.
705	 */
706	error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio);
707
708	BPFD_LOCK(d);
709	d->bd_fbuf = d->bd_hbuf;
710	d->bd_hbuf = NULL;
711	d->bd_hlen = 0;
712	BPFD_UNLOCK(d);
713
714	return (error);
715}
716
717/*
718 * If there are processes sleeping on this descriptor, wake them up.
719 */
720static __inline void
721bpf_wakeup(struct bpf_d *d)
722{
723
724	BPFD_LOCK_ASSERT(d);
725	if (d->bd_state == BPF_WAITING) {
726		callout_stop(&d->bd_callout);
727		d->bd_state = BPF_IDLE;
728	}
729	wakeup(d);
730	if (d->bd_async && d->bd_sig && d->bd_sigio)
731		pgsigio(&d->bd_sigio, d->bd_sig, 0);
732
733	selwakeuppri(&d->bd_sel, PRINET);
734	KNOTE_LOCKED(&d->bd_sel.si_note, 0);
735}
736
737static void
738bpf_timed_out(void *arg)
739{
740	struct bpf_d *d = (struct bpf_d *)arg;
741
742	BPFD_LOCK(d);
743	if (d->bd_state == BPF_WAITING) {
744		d->bd_state = BPF_TIMED_OUT;
745		if (d->bd_slen != 0)
746			bpf_wakeup(d);
747	}
748	BPFD_UNLOCK(d);
749}
750
751static int
752bpf_ready(struct bpf_d *d)
753{
754
755	BPFD_LOCK_ASSERT(d);
756
757	if (!bpf_canfreebuf(d) && d->bd_hlen != 0)
758		return (1);
759	if ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
760	    d->bd_slen != 0)
761		return (1);
762	return (0);
763}
764
765static int
766bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
767{
768	struct bpf_d *d = dev->si_drv1;
769	struct ifnet *ifp;
770	struct mbuf *m, *mc;
771	struct sockaddr dst;
772	int error, hlen;
773
774	d->bd_pid = curthread->td_proc->p_pid;
775	d->bd_wcount++;
776	if (d->bd_bif == NULL) {
777		d->bd_wdcount++;
778		return (ENXIO);
779	}
780
781	ifp = d->bd_bif->bif_ifp;
782
783	if ((ifp->if_flags & IFF_UP) == 0) {
784		d->bd_wdcount++;
785		return (ENETDOWN);
786	}
787
788	if (uio->uio_resid == 0) {
789		d->bd_wdcount++;
790		return (0);
791	}
792
793	bzero(&dst, sizeof(dst));
794	m = NULL;
795	hlen = 0;
796	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
797	    &m, &dst, &hlen, d->bd_wfilter);
798	if (error) {
799		d->bd_wdcount++;
800		return (error);
801	}
802	d->bd_wfcount++;
803	if (d->bd_hdrcmplt)
804		dst.sa_family = pseudo_AF_HDRCMPLT;
805
806	if (d->bd_feedback) {
807		mc = m_dup(m, M_DONTWAIT);
808		if (mc != NULL)
809			mc->m_pkthdr.rcvif = ifp;
810		/* XXX Do not return the same packet twice. */
811		if (d->bd_direction == BPF_D_INOUT)
812			m->m_flags |= M_SKIP_BPF;
813	} else
814		mc = NULL;
815
816	m->m_pkthdr.len -= hlen;
817	m->m_len -= hlen;
818	m->m_data += hlen;	/* XXX */
819
820#ifdef MAC
821	BPFD_LOCK(d);
822	mac_bpfdesc_create_mbuf(d, m);
823	if (mc != NULL)
824		mac_bpfdesc_create_mbuf(d, mc);
825	BPFD_UNLOCK(d);
826#endif
827
828	error = (*ifp->if_output)(ifp, m, &dst, NULL);
829	if (error)
830		d->bd_wdcount++;
831
832	if (mc != NULL) {
833		if (error == 0)
834			(*ifp->if_input)(ifp, mc);
835		else
836			m_freem(mc);
837	}
838
839	return (error);
840}
841
842/*
843 * Reset a descriptor by flushing its packet buffer and clearing the
844 * receive and drop counts.
845 */
846static void
847reset_d(struct bpf_d *d)
848{
849
850	mtx_assert(&d->bd_mtx, MA_OWNED);
851	if (d->bd_hbuf) {
852		/* Free the hold buffer. */
853		d->bd_fbuf = d->bd_hbuf;
854		d->bd_hbuf = NULL;
855	}
856	d->bd_slen = 0;
857	d->bd_hlen = 0;
858	d->bd_rcount = 0;
859	d->bd_dcount = 0;
860	d->bd_fcount = 0;
861	d->bd_wcount = 0;
862	d->bd_wfcount = 0;
863	d->bd_wdcount = 0;
864	d->bd_zcopy = 0;
865}
866
867/*
868 *  FIONREAD		Check for read packet available.
869 *  SIOCGIFADDR		Get interface address - convenient hook to driver.
870 *  BIOCGBLEN		Get buffer len [for read()].
871 *  BIOCSETF		Set ethernet read filter.
872 *  BIOCSETWF		Set ethernet write filter.
873 *  BIOCFLUSH		Flush read packet buffer.
874 *  BIOCPROMISC		Put interface into promiscuous mode.
875 *  BIOCGDLT		Get link layer type.
876 *  BIOCGETIF		Get interface name.
877 *  BIOCSETIF		Set interface.
878 *  BIOCSRTIMEOUT	Set read timeout.
879 *  BIOCGRTIMEOUT	Get read timeout.
880 *  BIOCGSTATS		Get packet stats.
881 *  BIOCIMMEDIATE	Set immediate mode.
882 *  BIOCVERSION		Get filter language version.
883 *  BIOCGHDRCMPLT	Get "header already complete" flag
884 *  BIOCSHDRCMPLT	Set "header already complete" flag
885 *  BIOCGDIRECTION	Get packet direction flag
886 *  BIOCSDIRECTION	Set packet direction flag
887 *  BIOCLOCK		Set "locked" flag
888 *  BIOCFEEDBACK	Set packet feedback mode.
889 *  BIOCSETZBUF		Set current zero-copy buffer locations.
890 *  BIOCGETZMAX		Get maximum zero-copy buffer size.
891 *  BIOCROTZBUF		Force rotation of zero-copy buffer
892 *  BIOCSETBUFMODE	Set buffer mode.
893 *  BIOCGETBUFMODE	Get current buffer mode.
894 */
895/* ARGSUSED */
896static	int
897bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
898    struct thread *td)
899{
900	struct bpf_d *d = dev->si_drv1;
901	int error = 0;
902
903	/*
904	 * Refresh PID associated with this descriptor.
905	 */
906	BPFD_LOCK(d);
907	d->bd_pid = td->td_proc->p_pid;
908	if (d->bd_state == BPF_WAITING)
909		callout_stop(&d->bd_callout);
910	d->bd_state = BPF_IDLE;
911	BPFD_UNLOCK(d);
912
913	if (d->bd_locked == 1) {
914		switch (cmd) {
915		case BIOCGBLEN:
916		case BIOCFLUSH:
917		case BIOCGDLT:
918		case BIOCGDLTLIST:
919		case BIOCGETIF:
920		case BIOCGRTIMEOUT:
921		case BIOCGSTATS:
922		case BIOCVERSION:
923		case BIOCGRSIG:
924		case BIOCGHDRCMPLT:
925		case BIOCFEEDBACK:
926		case FIONREAD:
927		case BIOCLOCK:
928		case BIOCSRTIMEOUT:
929		case BIOCIMMEDIATE:
930		case TIOCGPGRP:
931		case BIOCROTZBUF:
932			break;
933		default:
934			return (EPERM);
935		}
936	}
937	switch (cmd) {
938
939	default:
940		error = EINVAL;
941		break;
942
943	/*
944	 * Check for read packet available.
945	 */
946	case FIONREAD:
947		{
948			int n;
949
950			BPFD_LOCK(d);
951			n = d->bd_slen;
952			if (d->bd_hbuf)
953				n += d->bd_hlen;
954			BPFD_UNLOCK(d);
955
956			*(int *)addr = n;
957			break;
958		}
959
960	case SIOCGIFADDR:
961		{
962			struct ifnet *ifp;
963
964			if (d->bd_bif == NULL)
965				error = EINVAL;
966			else {
967				ifp = d->bd_bif->bif_ifp;
968				error = (*ifp->if_ioctl)(ifp, cmd, addr);
969			}
970			break;
971		}
972
973	/*
974	 * Get buffer len [for read()].
975	 */
976	case BIOCGBLEN:
977		*(u_int *)addr = d->bd_bufsize;
978		break;
979
980	/*
981	 * Set buffer length.
982	 */
983	case BIOCSBLEN:
984		error = bpf_ioctl_sblen(d, (u_int *)addr);
985		break;
986
987	/*
988	 * Set link layer read filter.
989	 */
990	case BIOCSETF:
991	case BIOCSETWF:
992		error = bpf_setf(d, (struct bpf_program *)addr, cmd);
993		break;
994
995	/*
996	 * Flush read packet buffer.
997	 */
998	case BIOCFLUSH:
999		BPFD_LOCK(d);
1000		reset_d(d);
1001		BPFD_UNLOCK(d);
1002		break;
1003
1004	/*
1005	 * Put interface into promiscuous mode.
1006	 */
1007	case BIOCPROMISC:
1008		if (d->bd_bif == NULL) {
1009			/*
1010			 * No interface attached yet.
1011			 */
1012			error = EINVAL;
1013			break;
1014		}
1015		if (d->bd_promisc == 0) {
1016			error = ifpromisc(d->bd_bif->bif_ifp, 1);
1017			if (error == 0)
1018				d->bd_promisc = 1;
1019		}
1020		break;
1021
1022	/*
1023	 * Get current data link type.
1024	 */
1025	case BIOCGDLT:
1026		if (d->bd_bif == NULL)
1027			error = EINVAL;
1028		else
1029			*(u_int *)addr = d->bd_bif->bif_dlt;
1030		break;
1031
1032	/*
1033	 * Get a list of supported data link types.
1034	 */
1035	case BIOCGDLTLIST:
1036		if (d->bd_bif == NULL)
1037			error = EINVAL;
1038		else
1039			error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
1040		break;
1041
1042	/*
1043	 * Set data link type.
1044	 */
1045	case BIOCSDLT:
1046		if (d->bd_bif == NULL)
1047			error = EINVAL;
1048		else
1049			error = bpf_setdlt(d, *(u_int *)addr);
1050		break;
1051
1052	/*
1053	 * Get interface name.
1054	 */
1055	case BIOCGETIF:
1056		if (d->bd_bif == NULL)
1057			error = EINVAL;
1058		else {
1059			struct ifnet *const ifp = d->bd_bif->bif_ifp;
1060			struct ifreq *const ifr = (struct ifreq *)addr;
1061
1062			strlcpy(ifr->ifr_name, ifp->if_xname,
1063			    sizeof(ifr->ifr_name));
1064		}
1065		break;
1066
1067	/*
1068	 * Set interface.
1069	 */
1070	case BIOCSETIF:
1071		error = bpf_setif(d, (struct ifreq *)addr);
1072		break;
1073
1074	/*
1075	 * Set read timeout.
1076	 */
1077	case BIOCSRTIMEOUT:
1078		{
1079			struct timeval *tv = (struct timeval *)addr;
1080
1081			/*
1082			 * Subtract 1 tick from tvtohz() since this isn't
1083			 * a one-shot timer.
1084			 */
1085			if ((error = itimerfix(tv)) == 0)
1086				d->bd_rtout = tvtohz(tv) - 1;
1087			break;
1088		}
1089
1090	/*
1091	 * Get read timeout.
1092	 */
1093	case BIOCGRTIMEOUT:
1094		{
1095			struct timeval *tv = (struct timeval *)addr;
1096
1097			tv->tv_sec = d->bd_rtout / hz;
1098			tv->tv_usec = (d->bd_rtout % hz) * tick;
1099			break;
1100		}
1101
1102	/*
1103	 * Get packet stats.
1104	 */
1105	case BIOCGSTATS:
1106		{
1107			struct bpf_stat *bs = (struct bpf_stat *)addr;
1108
1109			/* XXXCSJP overflow */
1110			bs->bs_recv = d->bd_rcount;
1111			bs->bs_drop = d->bd_dcount;
1112			break;
1113		}
1114
1115	/*
1116	 * Set immediate mode.
1117	 */
1118	case BIOCIMMEDIATE:
1119		d->bd_immediate = *(u_int *)addr;
1120		break;
1121
1122	case BIOCVERSION:
1123		{
1124			struct bpf_version *bv = (struct bpf_version *)addr;
1125
1126			bv->bv_major = BPF_MAJOR_VERSION;
1127			bv->bv_minor = BPF_MINOR_VERSION;
1128			break;
1129		}
1130
1131	/*
1132	 * Get "header already complete" flag
1133	 */
1134	case BIOCGHDRCMPLT:
1135		*(u_int *)addr = d->bd_hdrcmplt;
1136		break;
1137
1138	/*
1139	 * Set "header already complete" flag
1140	 */
1141	case BIOCSHDRCMPLT:
1142		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
1143		break;
1144
1145	/*
1146	 * Get packet direction flag
1147	 */
1148	case BIOCGDIRECTION:
1149		*(u_int *)addr = d->bd_direction;
1150		break;
1151
1152	/*
1153	 * Set packet direction flag
1154	 */
1155	case BIOCSDIRECTION:
1156		{
1157			u_int	direction;
1158
1159			direction = *(u_int *)addr;
1160			switch (direction) {
1161			case BPF_D_IN:
1162			case BPF_D_INOUT:
1163			case BPF_D_OUT:
1164				d->bd_direction = direction;
1165				break;
1166			default:
1167				error = EINVAL;
1168			}
1169		}
1170		break;
1171
1172	case BIOCFEEDBACK:
1173		d->bd_feedback = *(u_int *)addr;
1174		break;
1175
1176	case BIOCLOCK:
1177		d->bd_locked = 1;
1178		break;
1179
1180	case FIONBIO:		/* Non-blocking I/O */
1181		break;
1182
1183	case FIOASYNC:		/* Send signal on receive packets */
1184		d->bd_async = *(int *)addr;
1185		break;
1186
1187	case FIOSETOWN:
1188		error = fsetown(*(int *)addr, &d->bd_sigio);
1189		break;
1190
1191	case FIOGETOWN:
1192		*(int *)addr = fgetown(&d->bd_sigio);
1193		break;
1194
1195	/* This is deprecated, FIOSETOWN should be used instead. */
1196	case TIOCSPGRP:
1197		error = fsetown(-(*(int *)addr), &d->bd_sigio);
1198		break;
1199
1200	/* This is deprecated, FIOGETOWN should be used instead. */
1201	case TIOCGPGRP:
1202		*(int *)addr = -fgetown(&d->bd_sigio);
1203		break;
1204
1205	case BIOCSRSIG:		/* Set receive signal */
1206		{
1207			u_int sig;
1208
1209			sig = *(u_int *)addr;
1210
1211			if (sig >= NSIG)
1212				error = EINVAL;
1213			else
1214				d->bd_sig = sig;
1215			break;
1216		}
1217	case BIOCGRSIG:
1218		*(u_int *)addr = d->bd_sig;
1219		break;
1220
1221	case BIOCGETBUFMODE:
1222		*(u_int *)addr = d->bd_bufmode;
1223		break;
1224
1225	case BIOCSETBUFMODE:
1226		/*
1227		 * Allow the buffering mode to be changed as long as we
1228		 * haven't yet committed to a particular mode.  Our
1229		 * definition of commitment, for now, is whether or not a
1230		 * buffer has been allocated or an interface attached, since
1231		 * that's the point where things get tricky.
1232		 */
1233		switch (*(u_int *)addr) {
1234		case BPF_BUFMODE_BUFFER:
1235			break;
1236
1237		case BPF_BUFMODE_ZBUF:
1238			if (bpf_zerocopy_enable)
1239				break;
1240			/* FALLSTHROUGH */
1241
1242		default:
1243			return (EINVAL);
1244		}
1245
1246		BPFD_LOCK(d);
1247		if (d->bd_sbuf != NULL || d->bd_hbuf != NULL ||
1248		    d->bd_fbuf != NULL || d->bd_bif != NULL) {
1249			BPFD_UNLOCK(d);
1250			return (EBUSY);
1251		}
1252		d->bd_bufmode = *(u_int *)addr;
1253		BPFD_UNLOCK(d);
1254		break;
1255
1256	case BIOCGETZMAX:
1257		return (bpf_ioctl_getzmax(td, d, (size_t *)addr));
1258
1259	case BIOCSETZBUF:
1260		return (bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr));
1261
1262	case BIOCROTZBUF:
1263		return (bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr));
1264	}
1265	return (error);
1266}
1267
1268/*
1269 * Set d's packet filter program to fp.  If this file already has a filter,
1270 * free it and replace it.  Returns EINVAL for bogus requests.
1271 */
1272static int
1273bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
1274{
1275	struct bpf_insn *fcode, *old;
1276	u_int wfilter, flen, size;
1277#ifdef BPF_JITTER
1278	bpf_jit_filter *ofunc;
1279#endif
1280
1281	if (cmd == BIOCSETWF) {
1282		old = d->bd_wfilter;
1283		wfilter = 1;
1284#ifdef BPF_JITTER
1285		ofunc = NULL;
1286#endif
1287	} else {
1288		wfilter = 0;
1289		old = d->bd_rfilter;
1290#ifdef BPF_JITTER
1291		ofunc = d->bd_bfilter;
1292#endif
1293	}
1294	if (fp->bf_insns == NULL) {
1295		if (fp->bf_len != 0)
1296			return (EINVAL);
1297		BPFD_LOCK(d);
1298		if (wfilter)
1299			d->bd_wfilter = NULL;
1300		else {
1301			d->bd_rfilter = NULL;
1302#ifdef BPF_JITTER
1303			d->bd_bfilter = NULL;
1304#endif
1305		}
1306		reset_d(d);
1307		BPFD_UNLOCK(d);
1308		if (old != NULL)
1309			free((caddr_t)old, M_BPF);
1310#ifdef BPF_JITTER
1311		if (ofunc != NULL)
1312			bpf_destroy_jit_filter(ofunc);
1313#endif
1314		return (0);
1315	}
1316	flen = fp->bf_len;
1317	if (flen > bpf_maxinsns)
1318		return (EINVAL);
1319
1320	size = flen * sizeof(*fp->bf_insns);
1321	fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
1322	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
1323	    bpf_validate(fcode, (int)flen)) {
1324		BPFD_LOCK(d);
1325		if (wfilter)
1326			d->bd_wfilter = fcode;
1327		else {
1328			d->bd_rfilter = fcode;
1329#ifdef BPF_JITTER
1330			d->bd_bfilter = bpf_jitter(fcode, flen);
1331#endif
1332		}
1333		reset_d(d);
1334		BPFD_UNLOCK(d);
1335		if (old != NULL)
1336			free((caddr_t)old, M_BPF);
1337#ifdef BPF_JITTER
1338		if (ofunc != NULL)
1339			bpf_destroy_jit_filter(ofunc);
1340#endif
1341
1342		return (0);
1343	}
1344	free((caddr_t)fcode, M_BPF);
1345	return (EINVAL);
1346}
1347
1348/*
1349 * Detach a file from its current interface (if attached at all) and attach
1350 * to the interface indicated by the name stored in ifr.
1351 * Return an errno or 0.
1352 */
1353static int
1354bpf_setif(struct bpf_d *d, struct ifreq *ifr)
1355{
1356	struct bpf_if *bp;
1357	struct ifnet *theywant;
1358
1359	theywant = ifunit(ifr->ifr_name);
1360	if (theywant == NULL || theywant->if_bpf == NULL)
1361		return (ENXIO);
1362
1363	bp = theywant->if_bpf;
1364
1365	/*
1366	 * Behavior here depends on the buffering model.  If we're using
1367	 * kernel memory buffers, then we can allocate them here.  If we're
1368	 * using zero-copy, then the user process must have registered
1369	 * buffers by the time we get here.  If not, return an error.
1370	 *
1371	 * XXXRW: There are locking issues here with multi-threaded use: what
1372	 * if two threads try to set the interface at once?
1373	 */
1374	switch (d->bd_bufmode) {
1375	case BPF_BUFMODE_BUFFER:
1376		if (d->bd_sbuf == NULL)
1377			bpf_buffer_alloc(d);
1378		KASSERT(d->bd_sbuf != NULL, ("bpf_setif: bd_sbuf NULL"));
1379		break;
1380
1381	case BPF_BUFMODE_ZBUF:
1382		if (d->bd_sbuf == NULL)
1383			return (EINVAL);
1384		break;
1385
1386	default:
1387		panic("bpf_setif: bufmode %d", d->bd_bufmode);
1388	}
1389	if (bp != d->bd_bif) {
1390		if (d->bd_bif)
1391			/*
1392			 * Detach if attached to something else.
1393			 */
1394			bpf_detachd(d);
1395
1396		bpf_attachd(d, bp);
1397	}
1398	BPFD_LOCK(d);
1399	reset_d(d);
1400	BPFD_UNLOCK(d);
1401	return (0);
1402}
1403
1404/*
1405 * Support for select() and poll() system calls
1406 *
1407 * Return true iff the specific operation will not block indefinitely.
1408 * Otherwise, return false but make a note that a selwakeup() must be done.
1409 */
1410static int
1411bpfpoll(struct cdev *dev, int events, struct thread *td)
1412{
1413	struct bpf_d *d;
1414	int revents;
1415
1416	d = dev->si_drv1;
1417	if (d->bd_bif == NULL)
1418		return (ENXIO);
1419
1420	/*
1421	 * Refresh PID associated with this descriptor.
1422	 */
1423	revents = events & (POLLOUT | POLLWRNORM);
1424	BPFD_LOCK(d);
1425	d->bd_pid = td->td_proc->p_pid;
1426	if (events & (POLLIN | POLLRDNORM)) {
1427		if (bpf_ready(d))
1428			revents |= events & (POLLIN | POLLRDNORM);
1429		else {
1430			selrecord(td, &d->bd_sel);
1431			/* Start the read timeout if necessary. */
1432			if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1433				callout_reset(&d->bd_callout, d->bd_rtout,
1434				    bpf_timed_out, d);
1435				d->bd_state = BPF_WAITING;
1436			}
1437		}
1438	}
1439	BPFD_UNLOCK(d);
1440	return (revents);
1441}
1442
1443/*
1444 * Support for kevent() system call.  Register EVFILT_READ filters and
1445 * reject all others.
1446 */
1447int
1448bpfkqfilter(struct cdev *dev, struct knote *kn)
1449{
1450	struct bpf_d *d = (struct bpf_d *)dev->si_drv1;
1451
1452	if (kn->kn_filter != EVFILT_READ)
1453		return (1);
1454
1455	/*
1456	 * Refresh PID associated with this descriptor.
1457	 */
1458	BPFD_LOCK(d);
1459	d->bd_pid = curthread->td_proc->p_pid;
1460	kn->kn_fop = &bpfread_filtops;
1461	kn->kn_hook = d;
1462	knlist_add(&d->bd_sel.si_note, kn, 1);
1463	BPFD_UNLOCK(d);
1464
1465	return (0);
1466}
1467
1468static void
1469filt_bpfdetach(struct knote *kn)
1470{
1471	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1472
1473	knlist_remove(&d->bd_sel.si_note, kn, 0);
1474}
1475
1476static int
1477filt_bpfread(struct knote *kn, long hint)
1478{
1479	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1480	int ready;
1481
1482	BPFD_LOCK_ASSERT(d);
1483	ready = bpf_ready(d);
1484	if (ready) {
1485		kn->kn_data = d->bd_slen;
1486		if (d->bd_hbuf)
1487			kn->kn_data += d->bd_hlen;
1488	}
1489	else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1490		callout_reset(&d->bd_callout, d->bd_rtout,
1491		    bpf_timed_out, d);
1492		d->bd_state = BPF_WAITING;
1493	}
1494
1495	return (ready);
1496}
1497
1498/*
1499 * Incoming linkage from device drivers.  Process the packet pkt, of length
1500 * pktlen, which is stored in a contiguous buffer.  The packet is parsed
1501 * by each process' filter, and if accepted, stashed into the corresponding
1502 * buffer.
1503 */
1504void
1505bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
1506{
1507	struct bpf_d *d;
1508	u_int slen;
1509	int gottime;
1510	struct timeval tv;
1511
1512	gottime = 0;
1513	BPFIF_LOCK(bp);
1514	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1515		BPFD_LOCK(d);
1516		++d->bd_rcount;
1517#ifdef BPF_JITTER
1518		if (bpf_jitter_enable != 0 && d->bd_bfilter != NULL)
1519			slen = (*(d->bd_bfilter->func))(pkt, pktlen, pktlen);
1520		else
1521#endif
1522		slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
1523		if (slen != 0) {
1524			d->bd_fcount++;
1525			if (!gottime) {
1526				microtime(&tv);
1527				gottime = 1;
1528			}
1529#ifdef MAC
1530			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
1531#endif
1532				catchpacket(d, pkt, pktlen, slen,
1533				    bpf_append_bytes, &tv);
1534		}
1535		BPFD_UNLOCK(d);
1536	}
1537	BPFIF_UNLOCK(bp);
1538}
1539
1540#define	BPF_CHECK_DIRECTION(d, m) \
1541	if (((d)->bd_direction == BPF_D_IN && (m)->m_pkthdr.rcvif == NULL) || \
1542	    ((d)->bd_direction == BPF_D_OUT && (m)->m_pkthdr.rcvif != NULL))
1543
1544/*
1545 * Incoming linkage from device drivers, when packet is in an mbuf chain.
1546 */
1547void
1548bpf_mtap(struct bpf_if *bp, struct mbuf *m)
1549{
1550	struct bpf_d *d;
1551	u_int pktlen, slen;
1552	int gottime;
1553	struct timeval tv;
1554
1555	if (m->m_flags & M_SKIP_BPF) {
1556		m->m_flags &= ~M_SKIP_BPF;
1557		return;
1558	}
1559
1560	gottime = 0;
1561
1562	pktlen = m_length(m, NULL);
1563
1564	BPFIF_LOCK(bp);
1565	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1566		BPF_CHECK_DIRECTION(d, m)
1567			continue;
1568		BPFD_LOCK(d);
1569		++d->bd_rcount;
1570#ifdef BPF_JITTER
1571		/* XXX We cannot handle multiple mbufs. */
1572		if (bpf_jitter_enable != 0 && d->bd_bfilter != NULL &&
1573		    m->m_next == NULL)
1574			slen = (*(d->bd_bfilter->func))(mtod(m, u_char *),
1575			    pktlen, pktlen);
1576		else
1577#endif
1578		slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
1579		if (slen != 0) {
1580			d->bd_fcount++;
1581			if (!gottime) {
1582				microtime(&tv);
1583				gottime = 1;
1584			}
1585#ifdef MAC
1586			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
1587#endif
1588				catchpacket(d, (u_char *)m, pktlen, slen,
1589				    bpf_append_mbuf, &tv);
1590		}
1591		BPFD_UNLOCK(d);
1592	}
1593	BPFIF_UNLOCK(bp);
1594}
1595
1596/*
1597 * Incoming linkage from device drivers, when packet is in
1598 * an mbuf chain and to be prepended by a contiguous header.
1599 */
1600void
1601bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
1602{
1603	struct mbuf mb;
1604	struct bpf_d *d;
1605	u_int pktlen, slen;
1606	int gottime;
1607	struct timeval tv;
1608
1609	if (m->m_flags & M_SKIP_BPF) {
1610		m->m_flags &= ~M_SKIP_BPF;
1611		return;
1612	}
1613
1614	gottime = 0;
1615
1616	pktlen = m_length(m, NULL);
1617	/*
1618	 * Craft on-stack mbuf suitable for passing to bpf_filter.
1619	 * Note that we cut corners here; we only setup what's
1620	 * absolutely needed--this mbuf should never go anywhere else.
1621	 */
1622	mb.m_next = m;
1623	mb.m_data = data;
1624	mb.m_len = dlen;
1625	pktlen += dlen;
1626
1627	BPFIF_LOCK(bp);
1628	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1629		BPF_CHECK_DIRECTION(d, m)
1630			continue;
1631		BPFD_LOCK(d);
1632		++d->bd_rcount;
1633		slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
1634		if (slen != 0) {
1635			d->bd_fcount++;
1636			if (!gottime) {
1637				microtime(&tv);
1638				gottime = 1;
1639			}
1640#ifdef MAC
1641			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
1642#endif
1643				catchpacket(d, (u_char *)&mb, pktlen, slen,
1644				    bpf_append_mbuf, &tv);
1645		}
1646		BPFD_UNLOCK(d);
1647	}
1648	BPFIF_UNLOCK(bp);
1649}
1650
1651#undef	BPF_CHECK_DIRECTION
1652
1653/*
1654 * Move the packet data from interface memory (pkt) into the
1655 * store buffer.  "cpfn" is the routine called to do the actual data
1656 * transfer.  bcopy is passed in to copy contiguous chunks, while
1657 * bpf_append_mbuf is passed in to copy mbuf chains.  In the latter case,
1658 * pkt is really an mbuf.
1659 */
1660static void
1661catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
1662    void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int),
1663    struct timeval *tv)
1664{
1665	struct bpf_hdr hdr;
1666	int totlen, curlen;
1667	int hdrlen = d->bd_bif->bif_hdrlen;
1668	int do_wakeup = 0;
1669
1670	BPFD_LOCK_ASSERT(d);
1671
1672	/*
1673	 * Detect whether user space has released a buffer back to us, and if
1674	 * so, move it from being a hold buffer to a free buffer.  This may
1675	 * not be the best place to do it (for example, we might only want to
1676	 * run this check if we need the space), but for now it's a reliable
1677	 * spot to do it.
1678	 */
1679	if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) {
1680		d->bd_fbuf = d->bd_hbuf;
1681		d->bd_hbuf = NULL;
1682		d->bd_hlen = 0;
1683	}
1684
1685	/*
1686	 * Figure out how many bytes to move.  If the packet is
1687	 * greater or equal to the snapshot length, transfer that
1688	 * much.  Otherwise, transfer the whole packet (unless
1689	 * we hit the buffer size limit).
1690	 */
1691	totlen = hdrlen + min(snaplen, pktlen);
1692	if (totlen > d->bd_bufsize)
1693		totlen = d->bd_bufsize;
1694
1695	/*
1696	 * Round up the end of the previous packet to the next longword.
1697	 */
1698	curlen = BPF_WORDALIGN(d->bd_slen);
1699	if (curlen + totlen > d->bd_bufsize) {
1700		/*
1701		 * This packet will overflow the storage buffer.
1702		 * Rotate the buffers if we can, then wakeup any
1703		 * pending reads.
1704		 */
1705		if (d->bd_fbuf == NULL) {
1706			/*
1707			 * We haven't completed the previous read yet,
1708			 * so drop the packet.
1709			 */
1710			++d->bd_dcount;
1711			return;
1712		}
1713		ROTATE_BUFFERS(d);
1714		do_wakeup = 1;
1715		curlen = 0;
1716	}
1717	else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
1718		/*
1719		 * Immediate mode is set, or the read timeout has already
1720		 * expired during a select call.  A packet arrived, so the
1721		 * reader should be woken up.
1722		 */
1723		do_wakeup = 1;
1724
1725	/*
1726	 * Append the bpf header.  Note we append the actual header size, but
1727	 * move forward the length of the header plus padding.
1728	 */
1729	bzero(&hdr, sizeof(hdr));
1730	hdr.bh_tstamp = *tv;
1731	hdr.bh_datalen = pktlen;
1732	hdr.bh_hdrlen = hdrlen;
1733	hdr.bh_caplen = totlen - hdrlen;
1734	bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr));
1735
1736	/*
1737	 * Copy the packet data into the store buffer and update its length.
1738	 */
1739	(*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, hdr.bh_caplen);
1740	d->bd_slen = curlen + totlen;
1741
1742	if (do_wakeup)
1743		bpf_wakeup(d);
1744}
1745
1746/*
1747 * Free buffers currently in use by a descriptor.
1748 * Called on close.
1749 */
1750static void
1751bpf_freed(struct bpf_d *d)
1752{
1753
1754	/*
1755	 * We don't need to lock out interrupts since this descriptor has
1756	 * been detached from its interface and it yet hasn't been marked
1757	 * free.
1758	 */
1759	bpf_free(d);
1760	if (d->bd_rfilter) {
1761		free((caddr_t)d->bd_rfilter, M_BPF);
1762#ifdef BPF_JITTER
1763		bpf_destroy_jit_filter(d->bd_bfilter);
1764#endif
1765	}
1766	if (d->bd_wfilter)
1767		free((caddr_t)d->bd_wfilter, M_BPF);
1768	mtx_destroy(&d->bd_mtx);
1769}
1770
1771/*
1772 * Attach an interface to bpf.  dlt is the link layer type; hdrlen is the
1773 * fixed size of the link header (variable length headers not yet supported).
1774 */
1775void
1776bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
1777{
1778
1779	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
1780}
1781
1782/*
1783 * Attach an interface to bpf.  ifp is a pointer to the structure
1784 * defining the interface to be attached, dlt is the link layer type,
1785 * and hdrlen is the fixed size of the link header (variable length
1786 * headers are not yet supporrted).
1787 */
1788void
1789bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
1790{
1791	struct bpf_if *bp;
1792
1793	bp = malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
1794	if (bp == NULL)
1795		panic("bpfattach");
1796
1797	LIST_INIT(&bp->bif_dlist);
1798	bp->bif_ifp = ifp;
1799	bp->bif_dlt = dlt;
1800	mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF);
1801	KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized"));
1802	*driverp = bp;
1803
1804	mtx_lock(&bpf_mtx);
1805	LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
1806	mtx_unlock(&bpf_mtx);
1807
1808	/*
1809	 * Compute the length of the bpf header.  This is not necessarily
1810	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1811	 * that the network layer header begins on a longword boundary (for
1812	 * performance reasons and to alleviate alignment restrictions).
1813	 */
1814	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1815
1816	if (bootverbose)
1817		if_printf(ifp, "bpf attached\n");
1818}
1819
1820/*
1821 * Detach bpf from an interface.  This involves detaching each descriptor
1822 * associated with the interface, and leaving bd_bif NULL.  Notify each
1823 * descriptor as it's detached so that any sleepers wake up and get
1824 * ENXIO.
1825 */
1826void
1827bpfdetach(struct ifnet *ifp)
1828{
1829	struct bpf_if	*bp;
1830	struct bpf_d	*d;
1831
1832	/* Locate BPF interface information */
1833	mtx_lock(&bpf_mtx);
1834	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1835		if (ifp == bp->bif_ifp)
1836			break;
1837	}
1838
1839	/* Interface wasn't attached */
1840	if ((bp == NULL) || (bp->bif_ifp == NULL)) {
1841		mtx_unlock(&bpf_mtx);
1842		printf("bpfdetach: %s was not attached\n", ifp->if_xname);
1843		return;
1844	}
1845
1846	LIST_REMOVE(bp, bif_next);
1847	mtx_unlock(&bpf_mtx);
1848
1849	while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
1850		bpf_detachd(d);
1851		BPFD_LOCK(d);
1852		bpf_wakeup(d);
1853		BPFD_UNLOCK(d);
1854	}
1855
1856	mtx_destroy(&bp->bif_mtx);
1857	free(bp, M_BPF);
1858}
1859
1860/*
1861 * Get a list of available data link type of the interface.
1862 */
1863static int
1864bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
1865{
1866	int n, error;
1867	struct ifnet *ifp;
1868	struct bpf_if *bp;
1869
1870	ifp = d->bd_bif->bif_ifp;
1871	n = 0;
1872	error = 0;
1873	mtx_lock(&bpf_mtx);
1874	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1875		if (bp->bif_ifp != ifp)
1876			continue;
1877		if (bfl->bfl_list != NULL) {
1878			if (n >= bfl->bfl_len) {
1879				mtx_unlock(&bpf_mtx);
1880				return (ENOMEM);
1881			}
1882			error = copyout(&bp->bif_dlt,
1883			    bfl->bfl_list + n, sizeof(u_int));
1884		}
1885		n++;
1886	}
1887	mtx_unlock(&bpf_mtx);
1888	bfl->bfl_len = n;
1889	return (error);
1890}
1891
1892/*
1893 * Set the data link type of a BPF instance.
1894 */
1895static int
1896bpf_setdlt(struct bpf_d *d, u_int dlt)
1897{
1898	int error, opromisc;
1899	struct ifnet *ifp;
1900	struct bpf_if *bp;
1901
1902	if (d->bd_bif->bif_dlt == dlt)
1903		return (0);
1904	ifp = d->bd_bif->bif_ifp;
1905	mtx_lock(&bpf_mtx);
1906	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1907		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
1908			break;
1909	}
1910	mtx_unlock(&bpf_mtx);
1911	if (bp != NULL) {
1912		opromisc = d->bd_promisc;
1913		bpf_detachd(d);
1914		bpf_attachd(d, bp);
1915		BPFD_LOCK(d);
1916		reset_d(d);
1917		BPFD_UNLOCK(d);
1918		if (opromisc) {
1919			error = ifpromisc(bp->bif_ifp, 1);
1920			if (error)
1921				if_printf(bp->bif_ifp,
1922					"bpf_setdlt: ifpromisc failed (%d)\n",
1923					error);
1924			else
1925				d->bd_promisc = 1;
1926		}
1927	}
1928	return (bp == NULL ? EINVAL : 0);
1929}
1930
1931static void
1932bpf_clone(void *arg, struct ucred *cred, char *name, int namelen,
1933    struct cdev **dev)
1934{
1935	int u;
1936
1937	if (*dev != NULL)
1938		return;
1939	if (dev_stdclone(name, NULL, "bpf", &u) != 1)
1940		return;
1941	*dev = make_dev(&bpf_cdevsw, unit2minor(u), UID_ROOT, GID_WHEEL, 0600,
1942	    "bpf%d", u);
1943	dev_ref(*dev);
1944	(*dev)->si_flags |= SI_CHEAPCLONE;
1945	return;
1946}
1947
1948static void
1949bpf_drvinit(void *unused)
1950{
1951
1952	mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
1953	LIST_INIT(&bpf_iflist);
1954	EVENTHANDLER_REGISTER(dev_clone, bpf_clone, 0, 1000);
1955}
1956
1957static void
1958bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
1959{
1960
1961	bzero(d, sizeof(*d));
1962	BPFD_LOCK_ASSERT(bd);
1963	d->bd_structsize = sizeof(*d);
1964	d->bd_immediate = bd->bd_immediate;
1965	d->bd_promisc = bd->bd_promisc;
1966	d->bd_hdrcmplt = bd->bd_hdrcmplt;
1967	d->bd_direction = bd->bd_direction;
1968	d->bd_feedback = bd->bd_feedback;
1969	d->bd_async = bd->bd_async;
1970	d->bd_rcount = bd->bd_rcount;
1971	d->bd_dcount = bd->bd_dcount;
1972	d->bd_fcount = bd->bd_fcount;
1973	d->bd_sig = bd->bd_sig;
1974	d->bd_slen = bd->bd_slen;
1975	d->bd_hlen = bd->bd_hlen;
1976	d->bd_bufsize = bd->bd_bufsize;
1977	d->bd_pid = bd->bd_pid;
1978	strlcpy(d->bd_ifname,
1979	    bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
1980	d->bd_locked = bd->bd_locked;
1981	d->bd_wcount = bd->bd_wcount;
1982	d->bd_wdcount = bd->bd_wdcount;
1983	d->bd_wfcount = bd->bd_wfcount;
1984	d->bd_zcopy = bd->bd_zcopy;
1985	d->bd_bufmode = bd->bd_bufmode;
1986}
1987
1988static int
1989bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
1990{
1991	struct xbpf_d *xbdbuf, *xbd;
1992	int index, error;
1993	struct bpf_if *bp;
1994	struct bpf_d *bd;
1995
1996	/*
1997	 * XXX This is not technically correct. It is possible for non
1998	 * privileged users to open bpf devices. It would make sense
1999	 * if the users who opened the devices were able to retrieve
2000	 * the statistics for them, too.
2001	 */
2002	error = priv_check(req->td, PRIV_NET_BPF);
2003	if (error)
2004		return (error);
2005	if (req->oldptr == NULL)
2006		return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd)));
2007	if (bpf_bpfd_cnt == 0)
2008		return (SYSCTL_OUT(req, 0, 0));
2009	xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK);
2010	mtx_lock(&bpf_mtx);
2011	if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) {
2012		mtx_unlock(&bpf_mtx);
2013		free(xbdbuf, M_BPF);
2014		return (ENOMEM);
2015	}
2016	index = 0;
2017	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2018		BPFIF_LOCK(bp);
2019		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
2020			xbd = &xbdbuf[index++];
2021			BPFD_LOCK(bd);
2022			bpfstats_fill_xbpf(xbd, bd);
2023			BPFD_UNLOCK(bd);
2024		}
2025		BPFIF_UNLOCK(bp);
2026	}
2027	mtx_unlock(&bpf_mtx);
2028	error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
2029	free(xbdbuf, M_BPF);
2030	return (error);
2031}
2032
2033SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL);
2034
2035#else /* !DEV_BPF && !NETGRAPH_BPF */
2036/*
2037 * NOP stubs to allow bpf-using drivers to load and function.
2038 *
2039 * A 'better' implementation would allow the core bpf functionality
2040 * to be loaded at runtime.
2041 */
2042static struct bpf_if bp_null;
2043
2044void
2045bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
2046{
2047}
2048
2049void
2050bpf_mtap(struct bpf_if *bp, struct mbuf *m)
2051{
2052}
2053
2054void
2055bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m)
2056{
2057}
2058
2059void
2060bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
2061{
2062
2063	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
2064}
2065
2066void
2067bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
2068{
2069
2070	*driverp = &bp_null;
2071}
2072
2073void
2074bpfdetach(struct ifnet *ifp)
2075{
2076}
2077
2078u_int
2079bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
2080{
2081	return -1;	/* "no filter" behaviour */
2082}
2083
2084int
2085bpf_validate(const struct bpf_insn *f, int len)
2086{
2087	return 0;		/* false */
2088}
2089
2090#endif /* !DEV_BPF && !NETGRAPH_BPF */
2091