bpf.c revision 196150
1/*-
2 * Copyright (c) 1990, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from the Stanford/CMU enet packet filter,
6 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
7 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
8 * Berkeley Laboratory.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *      @(#)bpf.c	8.4 (Berkeley) 1/9/95
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD: head/sys/net/bpf.c 196150 2009-08-12 17:28:53Z jkim $");
39
40#include "opt_bpf.h"
41#include "opt_netgraph.h"
42
43#include <sys/types.h>
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/conf.h>
47#include <sys/fcntl.h>
48#include <sys/jail.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/time.h>
52#include <sys/priv.h>
53#include <sys/proc.h>
54#include <sys/signalvar.h>
55#include <sys/filio.h>
56#include <sys/sockio.h>
57#include <sys/ttycom.h>
58#include <sys/uio.h>
59
60#include <sys/event.h>
61#include <sys/file.h>
62#include <sys/poll.h>
63#include <sys/proc.h>
64
65#include <sys/socket.h>
66
67#include <net/if.h>
68#include <net/bpf.h>
69#include <net/bpf_buffer.h>
70#ifdef BPF_JITTER
71#include <net/bpf_jitter.h>
72#endif
73#include <net/bpf_zerocopy.h>
74#include <net/bpfdesc.h>
75#include <net/vnet.h>
76
77#include <netinet/in.h>
78#include <netinet/if_ether.h>
79#include <sys/kernel.h>
80#include <sys/sysctl.h>
81
82#include <net80211/ieee80211_freebsd.h>
83
84#include <security/mac/mac_framework.h>
85
86MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
87
88#if defined(DEV_BPF) || defined(NETGRAPH_BPF)
89
90#define PRINET  26			/* interruptible */
91
92/*
93 * bpf_iflist is a list of BPF interface structures, each corresponding to a
94 * specific DLT.  The same network interface might have several BPF interface
95 * structures registered by different layers in the stack (i.e., 802.11
96 * frames, ethernet frames, etc).
97 */
98static LIST_HEAD(, bpf_if)	bpf_iflist;
99static struct mtx	bpf_mtx;		/* bpf global lock */
100static int		bpf_bpfd_cnt;
101
102static void	bpf_attachd(struct bpf_d *, struct bpf_if *);
103static void	bpf_detachd(struct bpf_d *);
104static void	bpf_freed(struct bpf_d *);
105static int	bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
106		    struct sockaddr *, int *, struct bpf_insn *);
107static int	bpf_setif(struct bpf_d *, struct ifreq *);
108static void	bpf_timed_out(void *);
109static __inline void
110		bpf_wakeup(struct bpf_d *);
111static void	catchpacket(struct bpf_d *, u_char *, u_int, u_int,
112		    void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int),
113		    struct timeval *);
114static void	reset_d(struct bpf_d *);
115static int	 bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
116static int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
117static int	bpf_setdlt(struct bpf_d *, u_int);
118static void	filt_bpfdetach(struct knote *);
119static int	filt_bpfread(struct knote *, long);
120static void	bpf_drvinit(void *);
121static int	bpf_stats_sysctl(SYSCTL_HANDLER_ARGS);
122
123SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl");
124int bpf_maxinsns = BPF_MAXINSNS;
125SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
126    &bpf_maxinsns, 0, "Maximum bpf program instructions");
127static int bpf_zerocopy_enable = 0;
128SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW,
129    &bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions");
130SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW,
131    bpf_stats_sysctl, "bpf statistics portal");
132
133static	d_open_t	bpfopen;
134static	d_read_t	bpfread;
135static	d_write_t	bpfwrite;
136static	d_ioctl_t	bpfioctl;
137static	d_poll_t	bpfpoll;
138static	d_kqfilter_t	bpfkqfilter;
139
140static struct cdevsw bpf_cdevsw = {
141	.d_version =	D_VERSION,
142	.d_open =	bpfopen,
143	.d_read =	bpfread,
144	.d_write =	bpfwrite,
145	.d_ioctl =	bpfioctl,
146	.d_poll =	bpfpoll,
147	.d_name =	"bpf",
148	.d_kqfilter =	bpfkqfilter,
149};
150
151static struct filterops bpfread_filtops =
152	{ 1, NULL, filt_bpfdetach, filt_bpfread };
153
154/*
155 * Wrapper functions for various buffering methods.  If the set of buffer
156 * modes expands, we will probably want to introduce a switch data structure
157 * similar to protosw, et.
158 */
159static void
160bpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
161    u_int len)
162{
163
164	BPFD_LOCK_ASSERT(d);
165
166	switch (d->bd_bufmode) {
167	case BPF_BUFMODE_BUFFER:
168		return (bpf_buffer_append_bytes(d, buf, offset, src, len));
169
170	case BPF_BUFMODE_ZBUF:
171		d->bd_zcopy++;
172		return (bpf_zerocopy_append_bytes(d, buf, offset, src, len));
173
174	default:
175		panic("bpf_buf_append_bytes");
176	}
177}
178
179static void
180bpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
181    u_int len)
182{
183
184	BPFD_LOCK_ASSERT(d);
185
186	switch (d->bd_bufmode) {
187	case BPF_BUFMODE_BUFFER:
188		return (bpf_buffer_append_mbuf(d, buf, offset, src, len));
189
190	case BPF_BUFMODE_ZBUF:
191		d->bd_zcopy++;
192		return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len));
193
194	default:
195		panic("bpf_buf_append_mbuf");
196	}
197}
198
199/*
200 * This function gets called when the free buffer is re-assigned.
201 */
202static void
203bpf_buf_reclaimed(struct bpf_d *d)
204{
205
206	BPFD_LOCK_ASSERT(d);
207
208	switch (d->bd_bufmode) {
209	case BPF_BUFMODE_BUFFER:
210		return;
211
212	case BPF_BUFMODE_ZBUF:
213		bpf_zerocopy_buf_reclaimed(d);
214		return;
215
216	default:
217		panic("bpf_buf_reclaimed");
218	}
219}
220
221/*
222 * If the buffer mechanism has a way to decide that a held buffer can be made
223 * free, then it is exposed via the bpf_canfreebuf() interface.  (1) is
224 * returned if the buffer can be discarded, (0) is returned if it cannot.
225 */
226static int
227bpf_canfreebuf(struct bpf_d *d)
228{
229
230	BPFD_LOCK_ASSERT(d);
231
232	switch (d->bd_bufmode) {
233	case BPF_BUFMODE_ZBUF:
234		return (bpf_zerocopy_canfreebuf(d));
235	}
236	return (0);
237}
238
239/*
240 * Allow the buffer model to indicate that the current store buffer is
241 * immutable, regardless of the appearance of space.  Return (1) if the
242 * buffer is writable, and (0) if not.
243 */
244static int
245bpf_canwritebuf(struct bpf_d *d)
246{
247
248	BPFD_LOCK_ASSERT(d);
249
250	switch (d->bd_bufmode) {
251	case BPF_BUFMODE_ZBUF:
252		return (bpf_zerocopy_canwritebuf(d));
253	}
254	return (1);
255}
256
257/*
258 * Notify buffer model that an attempt to write to the store buffer has
259 * resulted in a dropped packet, in which case the buffer may be considered
260 * full.
261 */
262static void
263bpf_buffull(struct bpf_d *d)
264{
265
266	BPFD_LOCK_ASSERT(d);
267
268	switch (d->bd_bufmode) {
269	case BPF_BUFMODE_ZBUF:
270		bpf_zerocopy_buffull(d);
271		break;
272	}
273}
274
275/*
276 * Notify the buffer model that a buffer has moved into the hold position.
277 */
278void
279bpf_bufheld(struct bpf_d *d)
280{
281
282	BPFD_LOCK_ASSERT(d);
283
284	switch (d->bd_bufmode) {
285	case BPF_BUFMODE_ZBUF:
286		bpf_zerocopy_bufheld(d);
287		break;
288	}
289}
290
291static void
292bpf_free(struct bpf_d *d)
293{
294
295	switch (d->bd_bufmode) {
296	case BPF_BUFMODE_BUFFER:
297		return (bpf_buffer_free(d));
298
299	case BPF_BUFMODE_ZBUF:
300		return (bpf_zerocopy_free(d));
301
302	default:
303		panic("bpf_buf_free");
304	}
305}
306
307static int
308bpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio)
309{
310
311	if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
312		return (EOPNOTSUPP);
313	return (bpf_buffer_uiomove(d, buf, len, uio));
314}
315
316static int
317bpf_ioctl_sblen(struct bpf_d *d, u_int *i)
318{
319
320	if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
321		return (EOPNOTSUPP);
322	return (bpf_buffer_ioctl_sblen(d, i));
323}
324
325static int
326bpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i)
327{
328
329	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
330		return (EOPNOTSUPP);
331	return (bpf_zerocopy_ioctl_getzmax(td, d, i));
332}
333
334static int
335bpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
336{
337
338	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
339		return (EOPNOTSUPP);
340	return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz));
341}
342
343static int
344bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
345{
346
347	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
348		return (EOPNOTSUPP);
349	return (bpf_zerocopy_ioctl_setzbuf(td, d, bz));
350}
351
352/*
353 * General BPF functions.
354 */
355static int
356bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
357    struct sockaddr *sockp, int *hdrlen, struct bpf_insn *wfilter)
358{
359	const struct ieee80211_bpf_params *p;
360	struct ether_header *eh;
361	struct mbuf *m;
362	int error;
363	int len;
364	int hlen;
365	int slen;
366
367	/*
368	 * Build a sockaddr based on the data link layer type.
369	 * We do this at this level because the ethernet header
370	 * is copied directly into the data field of the sockaddr.
371	 * In the case of SLIP, there is no header and the packet
372	 * is forwarded as is.
373	 * Also, we are careful to leave room at the front of the mbuf
374	 * for the link level header.
375	 */
376	switch (linktype) {
377
378	case DLT_SLIP:
379		sockp->sa_family = AF_INET;
380		hlen = 0;
381		break;
382
383	case DLT_EN10MB:
384		sockp->sa_family = AF_UNSPEC;
385		/* XXX Would MAXLINKHDR be better? */
386		hlen = ETHER_HDR_LEN;
387		break;
388
389	case DLT_FDDI:
390		sockp->sa_family = AF_IMPLINK;
391		hlen = 0;
392		break;
393
394	case DLT_RAW:
395		sockp->sa_family = AF_UNSPEC;
396		hlen = 0;
397		break;
398
399	case DLT_NULL:
400		/*
401		 * null interface types require a 4 byte pseudo header which
402		 * corresponds to the address family of the packet.
403		 */
404		sockp->sa_family = AF_UNSPEC;
405		hlen = 4;
406		break;
407
408	case DLT_ATM_RFC1483:
409		/*
410		 * en atm driver requires 4-byte atm pseudo header.
411		 * though it isn't standard, vpi:vci needs to be
412		 * specified anyway.
413		 */
414		sockp->sa_family = AF_UNSPEC;
415		hlen = 12;	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
416		break;
417
418	case DLT_PPP:
419		sockp->sa_family = AF_UNSPEC;
420		hlen = 4;	/* This should match PPP_HDRLEN */
421		break;
422
423	case DLT_IEEE802_11:		/* IEEE 802.11 wireless */
424		sockp->sa_family = AF_IEEE80211;
425		hlen = 0;
426		break;
427
428	case DLT_IEEE802_11_RADIO:	/* IEEE 802.11 wireless w/ phy params */
429		sockp->sa_family = AF_IEEE80211;
430		sockp->sa_len = 12;	/* XXX != 0 */
431		hlen = sizeof(struct ieee80211_bpf_params);
432		break;
433
434	default:
435		return (EIO);
436	}
437
438	len = uio->uio_resid;
439
440	if (len - hlen > ifp->if_mtu)
441		return (EMSGSIZE);
442
443	if ((unsigned)len > MJUM16BYTES)
444		return (EIO);
445
446	if (len <= MHLEN)
447		MGETHDR(m, M_WAIT, MT_DATA);
448	else if (len <= MCLBYTES)
449		m = m_getcl(M_WAIT, MT_DATA, M_PKTHDR);
450	else
451		m = m_getjcl(M_WAIT, MT_DATA, M_PKTHDR,
452#if (MJUMPAGESIZE > MCLBYTES)
453		    len <= MJUMPAGESIZE ? MJUMPAGESIZE :
454#endif
455		    (len <= MJUM9BYTES ? MJUM9BYTES : MJUM16BYTES));
456	m->m_pkthdr.len = m->m_len = len;
457	m->m_pkthdr.rcvif = NULL;
458	*mp = m;
459
460	if (m->m_len < hlen) {
461		error = EPERM;
462		goto bad;
463	}
464
465	error = uiomove(mtod(m, u_char *), len, uio);
466	if (error)
467		goto bad;
468
469	slen = bpf_filter(wfilter, mtod(m, u_char *), len, len);
470	if (slen == 0) {
471		error = EPERM;
472		goto bad;
473	}
474
475	/* Check for multicast destination */
476	switch (linktype) {
477	case DLT_EN10MB:
478		eh = mtod(m, struct ether_header *);
479		if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
480			if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost,
481			    ETHER_ADDR_LEN) == 0)
482				m->m_flags |= M_BCAST;
483			else
484				m->m_flags |= M_MCAST;
485		}
486		break;
487	}
488
489	/*
490	 * Make room for link header, and copy it to sockaddr
491	 */
492	if (hlen != 0) {
493		if (sockp->sa_family == AF_IEEE80211) {
494			/*
495			 * Collect true length from the parameter header
496			 * NB: sockp is known to be zero'd so if we do a
497			 *     short copy unspecified parameters will be
498			 *     zero.
499			 * NB: packet may not be aligned after stripping
500			 *     bpf params
501			 * XXX check ibp_vers
502			 */
503			p = mtod(m, const struct ieee80211_bpf_params *);
504			hlen = p->ibp_len;
505			if (hlen > sizeof(sockp->sa_data)) {
506				error = EINVAL;
507				goto bad;
508			}
509		}
510		bcopy(m->m_data, sockp->sa_data, hlen);
511	}
512	*hdrlen = hlen;
513
514	return (0);
515bad:
516	m_freem(m);
517	return (error);
518}
519
520/*
521 * Attach file to the bpf interface, i.e. make d listen on bp.
522 */
523static void
524bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
525{
526	/*
527	 * Point d at bp, and add d to the interface's list of listeners.
528	 * Finally, point the driver's bpf cookie at the interface so
529	 * it will divert packets to bpf.
530	 */
531	BPFIF_LOCK(bp);
532	d->bd_bif = bp;
533	LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
534
535	bpf_bpfd_cnt++;
536	BPFIF_UNLOCK(bp);
537
538	EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
539}
540
541/*
542 * Detach a file from its interface.
543 */
544static void
545bpf_detachd(struct bpf_d *d)
546{
547	int error;
548	struct bpf_if *bp;
549	struct ifnet *ifp;
550
551	bp = d->bd_bif;
552	BPFIF_LOCK(bp);
553	BPFD_LOCK(d);
554	ifp = d->bd_bif->bif_ifp;
555
556	/*
557	 * Remove d from the interface's descriptor list.
558	 */
559	LIST_REMOVE(d, bd_next);
560
561	bpf_bpfd_cnt--;
562	d->bd_bif = NULL;
563	BPFD_UNLOCK(d);
564	BPFIF_UNLOCK(bp);
565
566	EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0);
567
568	/*
569	 * Check if this descriptor had requested promiscuous mode.
570	 * If so, turn it off.
571	 */
572	if (d->bd_promisc) {
573		d->bd_promisc = 0;
574		CURVNET_SET(ifp->if_vnet);
575		error = ifpromisc(ifp, 0);
576		CURVNET_RESTORE();
577		if (error != 0 && error != ENXIO) {
578			/*
579			 * ENXIO can happen if a pccard is unplugged
580			 * Something is really wrong if we were able to put
581			 * the driver into promiscuous mode, but can't
582			 * take it out.
583			 */
584			if_printf(bp->bif_ifp,
585				"bpf_detach: ifpromisc failed (%d)\n", error);
586		}
587	}
588}
589
590/*
591 * Close the descriptor by detaching it from its interface,
592 * deallocating its buffers, and marking it free.
593 */
594static void
595bpf_dtor(void *data)
596{
597	struct bpf_d *d = data;
598
599	BPFD_LOCK(d);
600	if (d->bd_state == BPF_WAITING)
601		callout_stop(&d->bd_callout);
602	d->bd_state = BPF_IDLE;
603	BPFD_UNLOCK(d);
604	funsetown(&d->bd_sigio);
605	mtx_lock(&bpf_mtx);
606	if (d->bd_bif)
607		bpf_detachd(d);
608	mtx_unlock(&bpf_mtx);
609	selwakeuppri(&d->bd_sel, PRINET);
610#ifdef MAC
611	mac_bpfdesc_destroy(d);
612#endif /* MAC */
613	knlist_destroy(&d->bd_sel.si_note);
614	bpf_freed(d);
615	free(d, M_BPF);
616}
617
618/*
619 * Open ethernet device.  Returns ENXIO for illegal minor device number,
620 * EBUSY if file is open by another process.
621 */
622/* ARGSUSED */
623static	int
624bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
625{
626	struct bpf_d *d;
627	int error;
628
629	d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
630	error = devfs_set_cdevpriv(d, bpf_dtor);
631	if (error != 0) {
632		free(d, M_BPF);
633		return (error);
634	}
635
636	/*
637	 * For historical reasons, perform a one-time initialization call to
638	 * the buffer routines, even though we're not yet committed to a
639	 * particular buffer method.
640	 */
641	bpf_buffer_init(d);
642	d->bd_bufmode = BPF_BUFMODE_BUFFER;
643	d->bd_sig = SIGIO;
644	d->bd_direction = BPF_D_INOUT;
645	d->bd_pid = td->td_proc->p_pid;
646#ifdef MAC
647	mac_bpfdesc_init(d);
648	mac_bpfdesc_create(td->td_ucred, d);
649#endif
650	mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
651	callout_init(&d->bd_callout, CALLOUT_MPSAFE);
652	knlist_init_mtx(&d->bd_sel.si_note, &d->bd_mtx);
653
654	return (0);
655}
656
657/*
658 *  bpfread - read next chunk of packets from buffers
659 */
660static	int
661bpfread(struct cdev *dev, struct uio *uio, int ioflag)
662{
663	struct bpf_d *d;
664	int timed_out;
665	int error;
666
667	error = devfs_get_cdevpriv((void **)&d);
668	if (error != 0)
669		return (error);
670
671	/*
672	 * Restrict application to use a buffer the same size as
673	 * as kernel buffers.
674	 */
675	if (uio->uio_resid != d->bd_bufsize)
676		return (EINVAL);
677
678	BPFD_LOCK(d);
679	d->bd_pid = curthread->td_proc->p_pid;
680	if (d->bd_bufmode != BPF_BUFMODE_BUFFER) {
681		BPFD_UNLOCK(d);
682		return (EOPNOTSUPP);
683	}
684	if (d->bd_state == BPF_WAITING)
685		callout_stop(&d->bd_callout);
686	timed_out = (d->bd_state == BPF_TIMED_OUT);
687	d->bd_state = BPF_IDLE;
688	/*
689	 * If the hold buffer is empty, then do a timed sleep, which
690	 * ends when the timeout expires or when enough packets
691	 * have arrived to fill the store buffer.
692	 */
693	while (d->bd_hbuf == NULL) {
694		if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
695			/*
696			 * A packet(s) either arrived since the previous
697			 * read or arrived while we were asleep.
698			 * Rotate the buffers and return what's here.
699			 */
700			ROTATE_BUFFERS(d);
701			break;
702		}
703
704		/*
705		 * No data is available, check to see if the bpf device
706		 * is still pointed at a real interface.  If not, return
707		 * ENXIO so that the userland process knows to rebind
708		 * it before using it again.
709		 */
710		if (d->bd_bif == NULL) {
711			BPFD_UNLOCK(d);
712			return (ENXIO);
713		}
714
715		if (ioflag & O_NONBLOCK) {
716			BPFD_UNLOCK(d);
717			return (EWOULDBLOCK);
718		}
719		error = msleep(d, &d->bd_mtx, PRINET|PCATCH,
720		     "bpf", d->bd_rtout);
721		if (error == EINTR || error == ERESTART) {
722			BPFD_UNLOCK(d);
723			return (error);
724		}
725		if (error == EWOULDBLOCK) {
726			/*
727			 * On a timeout, return what's in the buffer,
728			 * which may be nothing.  If there is something
729			 * in the store buffer, we can rotate the buffers.
730			 */
731			if (d->bd_hbuf)
732				/*
733				 * We filled up the buffer in between
734				 * getting the timeout and arriving
735				 * here, so we don't need to rotate.
736				 */
737				break;
738
739			if (d->bd_slen == 0) {
740				BPFD_UNLOCK(d);
741				return (0);
742			}
743			ROTATE_BUFFERS(d);
744			break;
745		}
746	}
747	/*
748	 * At this point, we know we have something in the hold slot.
749	 */
750	BPFD_UNLOCK(d);
751
752	/*
753	 * Move data from hold buffer into user space.
754	 * We know the entire buffer is transferred since
755	 * we checked above that the read buffer is bpf_bufsize bytes.
756	 *
757	 * XXXRW: More synchronization needed here: what if a second thread
758	 * issues a read on the same fd at the same time?  Don't want this
759	 * getting invalidated.
760	 */
761	error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio);
762
763	BPFD_LOCK(d);
764	d->bd_fbuf = d->bd_hbuf;
765	d->bd_hbuf = NULL;
766	d->bd_hlen = 0;
767	bpf_buf_reclaimed(d);
768	BPFD_UNLOCK(d);
769
770	return (error);
771}
772
773/*
774 * If there are processes sleeping on this descriptor, wake them up.
775 */
776static __inline void
777bpf_wakeup(struct bpf_d *d)
778{
779
780	BPFD_LOCK_ASSERT(d);
781	if (d->bd_state == BPF_WAITING) {
782		callout_stop(&d->bd_callout);
783		d->bd_state = BPF_IDLE;
784	}
785	wakeup(d);
786	if (d->bd_async && d->bd_sig && d->bd_sigio)
787		pgsigio(&d->bd_sigio, d->bd_sig, 0);
788
789	selwakeuppri(&d->bd_sel, PRINET);
790	KNOTE_LOCKED(&d->bd_sel.si_note, 0);
791}
792
793static void
794bpf_timed_out(void *arg)
795{
796	struct bpf_d *d = (struct bpf_d *)arg;
797
798	BPFD_LOCK(d);
799	if (d->bd_state == BPF_WAITING) {
800		d->bd_state = BPF_TIMED_OUT;
801		if (d->bd_slen != 0)
802			bpf_wakeup(d);
803	}
804	BPFD_UNLOCK(d);
805}
806
807static int
808bpf_ready(struct bpf_d *d)
809{
810
811	BPFD_LOCK_ASSERT(d);
812
813	if (!bpf_canfreebuf(d) && d->bd_hlen != 0)
814		return (1);
815	if ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
816	    d->bd_slen != 0)
817		return (1);
818	return (0);
819}
820
821static int
822bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
823{
824	struct bpf_d *d;
825	struct ifnet *ifp;
826	struct mbuf *m, *mc;
827	struct sockaddr dst;
828	int error, hlen;
829
830	error = devfs_get_cdevpriv((void **)&d);
831	if (error != 0)
832		return (error);
833
834	d->bd_pid = curthread->td_proc->p_pid;
835	d->bd_wcount++;
836	if (d->bd_bif == NULL) {
837		d->bd_wdcount++;
838		return (ENXIO);
839	}
840
841	ifp = d->bd_bif->bif_ifp;
842
843	if ((ifp->if_flags & IFF_UP) == 0) {
844		d->bd_wdcount++;
845		return (ENETDOWN);
846	}
847
848	if (uio->uio_resid == 0) {
849		d->bd_wdcount++;
850		return (0);
851	}
852
853	bzero(&dst, sizeof(dst));
854	m = NULL;
855	hlen = 0;
856	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
857	    &m, &dst, &hlen, d->bd_wfilter);
858	if (error) {
859		d->bd_wdcount++;
860		return (error);
861	}
862	d->bd_wfcount++;
863	if (d->bd_hdrcmplt)
864		dst.sa_family = pseudo_AF_HDRCMPLT;
865
866	if (d->bd_feedback) {
867		mc = m_dup(m, M_DONTWAIT);
868		if (mc != NULL)
869			mc->m_pkthdr.rcvif = ifp;
870		/* Set M_PROMISC for outgoing packets to be discarded. */
871		if (d->bd_direction == BPF_D_INOUT)
872			m->m_flags |= M_PROMISC;
873	} else
874		mc = NULL;
875
876	m->m_pkthdr.len -= hlen;
877	m->m_len -= hlen;
878	m->m_data += hlen;	/* XXX */
879
880	CURVNET_SET(ifp->if_vnet);
881#ifdef MAC
882	BPFD_LOCK(d);
883	mac_bpfdesc_create_mbuf(d, m);
884	if (mc != NULL)
885		mac_bpfdesc_create_mbuf(d, mc);
886	BPFD_UNLOCK(d);
887#endif
888
889	error = (*ifp->if_output)(ifp, m, &dst, NULL);
890	if (error)
891		d->bd_wdcount++;
892
893	if (mc != NULL) {
894		if (error == 0)
895			(*ifp->if_input)(ifp, mc);
896		else
897			m_freem(mc);
898	}
899	CURVNET_RESTORE();
900
901	return (error);
902}
903
904/*
905 * Reset a descriptor by flushing its packet buffer and clearing the receive
906 * and drop counts.  This is doable for kernel-only buffers, but with
907 * zero-copy buffers, we can't write to (or rotate) buffers that are
908 * currently owned by userspace.  It would be nice if we could encapsulate
909 * this logic in the buffer code rather than here.
910 */
911static void
912reset_d(struct bpf_d *d)
913{
914
915	mtx_assert(&d->bd_mtx, MA_OWNED);
916
917	if ((d->bd_hbuf != NULL) &&
918	    (d->bd_bufmode != BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) {
919		/* Free the hold buffer. */
920		d->bd_fbuf = d->bd_hbuf;
921		d->bd_hbuf = NULL;
922		d->bd_hlen = 0;
923		bpf_buf_reclaimed(d);
924	}
925	if (bpf_canwritebuf(d))
926		d->bd_slen = 0;
927	d->bd_rcount = 0;
928	d->bd_dcount = 0;
929	d->bd_fcount = 0;
930	d->bd_wcount = 0;
931	d->bd_wfcount = 0;
932	d->bd_wdcount = 0;
933	d->bd_zcopy = 0;
934}
935
936/*
937 *  FIONREAD		Check for read packet available.
938 *  SIOCGIFADDR		Get interface address - convenient hook to driver.
939 *  BIOCGBLEN		Get buffer len [for read()].
940 *  BIOCSETF		Set read filter.
941 *  BIOCSETFNR		Set read filter without resetting descriptor.
942 *  BIOCSETWF		Set write filter.
943 *  BIOCFLUSH		Flush read packet buffer.
944 *  BIOCPROMISC		Put interface into promiscuous mode.
945 *  BIOCGDLT		Get link layer type.
946 *  BIOCGETIF		Get interface name.
947 *  BIOCSETIF		Set interface.
948 *  BIOCSRTIMEOUT	Set read timeout.
949 *  BIOCGRTIMEOUT	Get read timeout.
950 *  BIOCGSTATS		Get packet stats.
951 *  BIOCIMMEDIATE	Set immediate mode.
952 *  BIOCVERSION		Get filter language version.
953 *  BIOCGHDRCMPLT	Get "header already complete" flag
954 *  BIOCSHDRCMPLT	Set "header already complete" flag
955 *  BIOCGDIRECTION	Get packet direction flag
956 *  BIOCSDIRECTION	Set packet direction flag
957 *  BIOCLOCK		Set "locked" flag
958 *  BIOCFEEDBACK	Set packet feedback mode.
959 *  BIOCSETZBUF		Set current zero-copy buffer locations.
960 *  BIOCGETZMAX		Get maximum zero-copy buffer size.
961 *  BIOCROTZBUF		Force rotation of zero-copy buffer
962 *  BIOCSETBUFMODE	Set buffer mode.
963 *  BIOCGETBUFMODE	Get current buffer mode.
964 */
965/* ARGSUSED */
966static	int
967bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
968    struct thread *td)
969{
970	struct bpf_d *d;
971	int error;
972
973	error = devfs_get_cdevpriv((void **)&d);
974	if (error != 0)
975		return (error);
976
977	/*
978	 * Refresh PID associated with this descriptor.
979	 */
980	BPFD_LOCK(d);
981	d->bd_pid = td->td_proc->p_pid;
982	if (d->bd_state == BPF_WAITING)
983		callout_stop(&d->bd_callout);
984	d->bd_state = BPF_IDLE;
985	BPFD_UNLOCK(d);
986
987	if (d->bd_locked == 1) {
988		switch (cmd) {
989		case BIOCGBLEN:
990		case BIOCFLUSH:
991		case BIOCGDLT:
992		case BIOCGDLTLIST:
993		case BIOCGETIF:
994		case BIOCGRTIMEOUT:
995		case BIOCGSTATS:
996		case BIOCVERSION:
997		case BIOCGRSIG:
998		case BIOCGHDRCMPLT:
999		case BIOCFEEDBACK:
1000		case FIONREAD:
1001		case BIOCLOCK:
1002		case BIOCSRTIMEOUT:
1003		case BIOCIMMEDIATE:
1004		case TIOCGPGRP:
1005		case BIOCROTZBUF:
1006			break;
1007		default:
1008			return (EPERM);
1009		}
1010	}
1011	CURVNET_SET(TD_TO_VNET(td));
1012	switch (cmd) {
1013
1014	default:
1015		error = EINVAL;
1016		break;
1017
1018	/*
1019	 * Check for read packet available.
1020	 */
1021	case FIONREAD:
1022		{
1023			int n;
1024
1025			BPFD_LOCK(d);
1026			n = d->bd_slen;
1027			if (d->bd_hbuf)
1028				n += d->bd_hlen;
1029			BPFD_UNLOCK(d);
1030
1031			*(int *)addr = n;
1032			break;
1033		}
1034
1035	case SIOCGIFADDR:
1036		{
1037			struct ifnet *ifp;
1038
1039			if (d->bd_bif == NULL)
1040				error = EINVAL;
1041			else {
1042				ifp = d->bd_bif->bif_ifp;
1043				error = (*ifp->if_ioctl)(ifp, cmd, addr);
1044			}
1045			break;
1046		}
1047
1048	/*
1049	 * Get buffer len [for read()].
1050	 */
1051	case BIOCGBLEN:
1052		*(u_int *)addr = d->bd_bufsize;
1053		break;
1054
1055	/*
1056	 * Set buffer length.
1057	 */
1058	case BIOCSBLEN:
1059		error = bpf_ioctl_sblen(d, (u_int *)addr);
1060		break;
1061
1062	/*
1063	 * Set link layer read filter.
1064	 */
1065	case BIOCSETF:
1066	case BIOCSETFNR:
1067	case BIOCSETWF:
1068		error = bpf_setf(d, (struct bpf_program *)addr, cmd);
1069		break;
1070
1071	/*
1072	 * Flush read packet buffer.
1073	 */
1074	case BIOCFLUSH:
1075		BPFD_LOCK(d);
1076		reset_d(d);
1077		BPFD_UNLOCK(d);
1078		break;
1079
1080	/*
1081	 * Put interface into promiscuous mode.
1082	 */
1083	case BIOCPROMISC:
1084		if (d->bd_bif == NULL) {
1085			/*
1086			 * No interface attached yet.
1087			 */
1088			error = EINVAL;
1089			break;
1090		}
1091		if (d->bd_promisc == 0) {
1092			error = ifpromisc(d->bd_bif->bif_ifp, 1);
1093			if (error == 0)
1094				d->bd_promisc = 1;
1095		}
1096		break;
1097
1098	/*
1099	 * Get current data link type.
1100	 */
1101	case BIOCGDLT:
1102		if (d->bd_bif == NULL)
1103			error = EINVAL;
1104		else
1105			*(u_int *)addr = d->bd_bif->bif_dlt;
1106		break;
1107
1108	/*
1109	 * Get a list of supported data link types.
1110	 */
1111	case BIOCGDLTLIST:
1112		if (d->bd_bif == NULL)
1113			error = EINVAL;
1114		else
1115			error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
1116		break;
1117
1118	/*
1119	 * Set data link type.
1120	 */
1121	case BIOCSDLT:
1122		if (d->bd_bif == NULL)
1123			error = EINVAL;
1124		else
1125			error = bpf_setdlt(d, *(u_int *)addr);
1126		break;
1127
1128	/*
1129	 * Get interface name.
1130	 */
1131	case BIOCGETIF:
1132		if (d->bd_bif == NULL)
1133			error = EINVAL;
1134		else {
1135			struct ifnet *const ifp = d->bd_bif->bif_ifp;
1136			struct ifreq *const ifr = (struct ifreq *)addr;
1137
1138			strlcpy(ifr->ifr_name, ifp->if_xname,
1139			    sizeof(ifr->ifr_name));
1140		}
1141		break;
1142
1143	/*
1144	 * Set interface.
1145	 */
1146	case BIOCSETIF:
1147		error = bpf_setif(d, (struct ifreq *)addr);
1148		break;
1149
1150	/*
1151	 * Set read timeout.
1152	 */
1153	case BIOCSRTIMEOUT:
1154		{
1155			struct timeval *tv = (struct timeval *)addr;
1156
1157			/*
1158			 * Subtract 1 tick from tvtohz() since this isn't
1159			 * a one-shot timer.
1160			 */
1161			if ((error = itimerfix(tv)) == 0)
1162				d->bd_rtout = tvtohz(tv) - 1;
1163			break;
1164		}
1165
1166	/*
1167	 * Get read timeout.
1168	 */
1169	case BIOCGRTIMEOUT:
1170		{
1171			struct timeval *tv = (struct timeval *)addr;
1172
1173			tv->tv_sec = d->bd_rtout / hz;
1174			tv->tv_usec = (d->bd_rtout % hz) * tick;
1175			break;
1176		}
1177
1178	/*
1179	 * Get packet stats.
1180	 */
1181	case BIOCGSTATS:
1182		{
1183			struct bpf_stat *bs = (struct bpf_stat *)addr;
1184
1185			/* XXXCSJP overflow */
1186			bs->bs_recv = d->bd_rcount;
1187			bs->bs_drop = d->bd_dcount;
1188			break;
1189		}
1190
1191	/*
1192	 * Set immediate mode.
1193	 */
1194	case BIOCIMMEDIATE:
1195		d->bd_immediate = *(u_int *)addr;
1196		break;
1197
1198	case BIOCVERSION:
1199		{
1200			struct bpf_version *bv = (struct bpf_version *)addr;
1201
1202			bv->bv_major = BPF_MAJOR_VERSION;
1203			bv->bv_minor = BPF_MINOR_VERSION;
1204			break;
1205		}
1206
1207	/*
1208	 * Get "header already complete" flag
1209	 */
1210	case BIOCGHDRCMPLT:
1211		*(u_int *)addr = d->bd_hdrcmplt;
1212		break;
1213
1214	/*
1215	 * Set "header already complete" flag
1216	 */
1217	case BIOCSHDRCMPLT:
1218		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
1219		break;
1220
1221	/*
1222	 * Get packet direction flag
1223	 */
1224	case BIOCGDIRECTION:
1225		*(u_int *)addr = d->bd_direction;
1226		break;
1227
1228	/*
1229	 * Set packet direction flag
1230	 */
1231	case BIOCSDIRECTION:
1232		{
1233			u_int	direction;
1234
1235			direction = *(u_int *)addr;
1236			switch (direction) {
1237			case BPF_D_IN:
1238			case BPF_D_INOUT:
1239			case BPF_D_OUT:
1240				d->bd_direction = direction;
1241				break;
1242			default:
1243				error = EINVAL;
1244			}
1245		}
1246		break;
1247
1248	case BIOCFEEDBACK:
1249		d->bd_feedback = *(u_int *)addr;
1250		break;
1251
1252	case BIOCLOCK:
1253		d->bd_locked = 1;
1254		break;
1255
1256	case FIONBIO:		/* Non-blocking I/O */
1257		break;
1258
1259	case FIOASYNC:		/* Send signal on receive packets */
1260		d->bd_async = *(int *)addr;
1261		break;
1262
1263	case FIOSETOWN:
1264		error = fsetown(*(int *)addr, &d->bd_sigio);
1265		break;
1266
1267	case FIOGETOWN:
1268		*(int *)addr = fgetown(&d->bd_sigio);
1269		break;
1270
1271	/* This is deprecated, FIOSETOWN should be used instead. */
1272	case TIOCSPGRP:
1273		error = fsetown(-(*(int *)addr), &d->bd_sigio);
1274		break;
1275
1276	/* This is deprecated, FIOGETOWN should be used instead. */
1277	case TIOCGPGRP:
1278		*(int *)addr = -fgetown(&d->bd_sigio);
1279		break;
1280
1281	case BIOCSRSIG:		/* Set receive signal */
1282		{
1283			u_int sig;
1284
1285			sig = *(u_int *)addr;
1286
1287			if (sig >= NSIG)
1288				error = EINVAL;
1289			else
1290				d->bd_sig = sig;
1291			break;
1292		}
1293	case BIOCGRSIG:
1294		*(u_int *)addr = d->bd_sig;
1295		break;
1296
1297	case BIOCGETBUFMODE:
1298		*(u_int *)addr = d->bd_bufmode;
1299		break;
1300
1301	case BIOCSETBUFMODE:
1302		/*
1303		 * Allow the buffering mode to be changed as long as we
1304		 * haven't yet committed to a particular mode.  Our
1305		 * definition of commitment, for now, is whether or not a
1306		 * buffer has been allocated or an interface attached, since
1307		 * that's the point where things get tricky.
1308		 */
1309		switch (*(u_int *)addr) {
1310		case BPF_BUFMODE_BUFFER:
1311			break;
1312
1313		case BPF_BUFMODE_ZBUF:
1314			if (bpf_zerocopy_enable)
1315				break;
1316			/* FALLSTHROUGH */
1317
1318		default:
1319			return (EINVAL);
1320		}
1321
1322		BPFD_LOCK(d);
1323		if (d->bd_sbuf != NULL || d->bd_hbuf != NULL ||
1324		    d->bd_fbuf != NULL || d->bd_bif != NULL) {
1325			BPFD_UNLOCK(d);
1326			return (EBUSY);
1327		}
1328		d->bd_bufmode = *(u_int *)addr;
1329		BPFD_UNLOCK(d);
1330		break;
1331
1332	case BIOCGETZMAX:
1333		return (bpf_ioctl_getzmax(td, d, (size_t *)addr));
1334
1335	case BIOCSETZBUF:
1336		return (bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr));
1337
1338	case BIOCROTZBUF:
1339		return (bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr));
1340	}
1341	CURVNET_RESTORE();
1342	return (error);
1343}
1344
1345/*
1346 * Set d's packet filter program to fp.  If this file already has a filter,
1347 * free it and replace it.  Returns EINVAL for bogus requests.
1348 */
1349static int
1350bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
1351{
1352	struct bpf_insn *fcode, *old;
1353	u_int wfilter, flen, size;
1354#ifdef BPF_JITTER
1355	bpf_jit_filter *ofunc;
1356#endif
1357
1358	if (cmd == BIOCSETWF) {
1359		old = d->bd_wfilter;
1360		wfilter = 1;
1361#ifdef BPF_JITTER
1362		ofunc = NULL;
1363#endif
1364	} else {
1365		wfilter = 0;
1366		old = d->bd_rfilter;
1367#ifdef BPF_JITTER
1368		ofunc = d->bd_bfilter;
1369#endif
1370	}
1371	if (fp->bf_insns == NULL) {
1372		if (fp->bf_len != 0)
1373			return (EINVAL);
1374		BPFD_LOCK(d);
1375		if (wfilter)
1376			d->bd_wfilter = NULL;
1377		else {
1378			d->bd_rfilter = NULL;
1379#ifdef BPF_JITTER
1380			d->bd_bfilter = NULL;
1381#endif
1382			if (cmd == BIOCSETF)
1383				reset_d(d);
1384		}
1385		BPFD_UNLOCK(d);
1386		if (old != NULL)
1387			free((caddr_t)old, M_BPF);
1388#ifdef BPF_JITTER
1389		if (ofunc != NULL)
1390			bpf_destroy_jit_filter(ofunc);
1391#endif
1392		return (0);
1393	}
1394	flen = fp->bf_len;
1395	if (flen > bpf_maxinsns)
1396		return (EINVAL);
1397
1398	size = flen * sizeof(*fp->bf_insns);
1399	fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
1400	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
1401	    bpf_validate(fcode, (int)flen)) {
1402		BPFD_LOCK(d);
1403		if (wfilter)
1404			d->bd_wfilter = fcode;
1405		else {
1406			d->bd_rfilter = fcode;
1407#ifdef BPF_JITTER
1408			d->bd_bfilter = bpf_jitter(fcode, flen);
1409#endif
1410			if (cmd == BIOCSETF)
1411				reset_d(d);
1412		}
1413		BPFD_UNLOCK(d);
1414		if (old != NULL)
1415			free((caddr_t)old, M_BPF);
1416#ifdef BPF_JITTER
1417		if (ofunc != NULL)
1418			bpf_destroy_jit_filter(ofunc);
1419#endif
1420
1421		return (0);
1422	}
1423	free((caddr_t)fcode, M_BPF);
1424	return (EINVAL);
1425}
1426
1427/*
1428 * Detach a file from its current interface (if attached at all) and attach
1429 * to the interface indicated by the name stored in ifr.
1430 * Return an errno or 0.
1431 */
1432static int
1433bpf_setif(struct bpf_d *d, struct ifreq *ifr)
1434{
1435	struct bpf_if *bp;
1436	struct ifnet *theywant;
1437
1438	theywant = ifunit(ifr->ifr_name);
1439	if (theywant == NULL || theywant->if_bpf == NULL)
1440		return (ENXIO);
1441
1442	bp = theywant->if_bpf;
1443
1444	/*
1445	 * Behavior here depends on the buffering model.  If we're using
1446	 * kernel memory buffers, then we can allocate them here.  If we're
1447	 * using zero-copy, then the user process must have registered
1448	 * buffers by the time we get here.  If not, return an error.
1449	 *
1450	 * XXXRW: There are locking issues here with multi-threaded use: what
1451	 * if two threads try to set the interface at once?
1452	 */
1453	switch (d->bd_bufmode) {
1454	case BPF_BUFMODE_BUFFER:
1455		if (d->bd_sbuf == NULL)
1456			bpf_buffer_alloc(d);
1457		KASSERT(d->bd_sbuf != NULL, ("bpf_setif: bd_sbuf NULL"));
1458		break;
1459
1460	case BPF_BUFMODE_ZBUF:
1461		if (d->bd_sbuf == NULL)
1462			return (EINVAL);
1463		break;
1464
1465	default:
1466		panic("bpf_setif: bufmode %d", d->bd_bufmode);
1467	}
1468	if (bp != d->bd_bif) {
1469		if (d->bd_bif)
1470			/*
1471			 * Detach if attached to something else.
1472			 */
1473			bpf_detachd(d);
1474
1475		bpf_attachd(d, bp);
1476	}
1477	BPFD_LOCK(d);
1478	reset_d(d);
1479	BPFD_UNLOCK(d);
1480	return (0);
1481}
1482
1483/*
1484 * Support for select() and poll() system calls
1485 *
1486 * Return true iff the specific operation will not block indefinitely.
1487 * Otherwise, return false but make a note that a selwakeup() must be done.
1488 */
1489static int
1490bpfpoll(struct cdev *dev, int events, struct thread *td)
1491{
1492	struct bpf_d *d;
1493	int revents;
1494
1495	if (devfs_get_cdevpriv((void **)&d) != 0 || d->bd_bif == NULL)
1496		return (events &
1497		    (POLLHUP|POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM));
1498
1499	/*
1500	 * Refresh PID associated with this descriptor.
1501	 */
1502	revents = events & (POLLOUT | POLLWRNORM);
1503	BPFD_LOCK(d);
1504	d->bd_pid = td->td_proc->p_pid;
1505	if (events & (POLLIN | POLLRDNORM)) {
1506		if (bpf_ready(d))
1507			revents |= events & (POLLIN | POLLRDNORM);
1508		else {
1509			selrecord(td, &d->bd_sel);
1510			/* Start the read timeout if necessary. */
1511			if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1512				callout_reset(&d->bd_callout, d->bd_rtout,
1513				    bpf_timed_out, d);
1514				d->bd_state = BPF_WAITING;
1515			}
1516		}
1517	}
1518	BPFD_UNLOCK(d);
1519	return (revents);
1520}
1521
1522/*
1523 * Support for kevent() system call.  Register EVFILT_READ filters and
1524 * reject all others.
1525 */
1526int
1527bpfkqfilter(struct cdev *dev, struct knote *kn)
1528{
1529	struct bpf_d *d;
1530
1531	if (devfs_get_cdevpriv((void **)&d) != 0 ||
1532	    kn->kn_filter != EVFILT_READ)
1533		return (1);
1534
1535	/*
1536	 * Refresh PID associated with this descriptor.
1537	 */
1538	BPFD_LOCK(d);
1539	d->bd_pid = curthread->td_proc->p_pid;
1540	kn->kn_fop = &bpfread_filtops;
1541	kn->kn_hook = d;
1542	knlist_add(&d->bd_sel.si_note, kn, 1);
1543	BPFD_UNLOCK(d);
1544
1545	return (0);
1546}
1547
1548static void
1549filt_bpfdetach(struct knote *kn)
1550{
1551	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1552
1553	knlist_remove(&d->bd_sel.si_note, kn, 0);
1554}
1555
1556static int
1557filt_bpfread(struct knote *kn, long hint)
1558{
1559	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1560	int ready;
1561
1562	BPFD_LOCK_ASSERT(d);
1563	ready = bpf_ready(d);
1564	if (ready) {
1565		kn->kn_data = d->bd_slen;
1566		if (d->bd_hbuf)
1567			kn->kn_data += d->bd_hlen;
1568	}
1569	else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1570		callout_reset(&d->bd_callout, d->bd_rtout,
1571		    bpf_timed_out, d);
1572		d->bd_state = BPF_WAITING;
1573	}
1574
1575	return (ready);
1576}
1577
1578/*
1579 * Incoming linkage from device drivers.  Process the packet pkt, of length
1580 * pktlen, which is stored in a contiguous buffer.  The packet is parsed
1581 * by each process' filter, and if accepted, stashed into the corresponding
1582 * buffer.
1583 */
1584void
1585bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
1586{
1587	struct bpf_d *d;
1588#ifdef BPF_JITTER
1589	bpf_jit_filter *bf;
1590#endif
1591	u_int slen;
1592	int gottime;
1593	struct timeval tv;
1594
1595	gottime = 0;
1596	BPFIF_LOCK(bp);
1597	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1598		BPFD_LOCK(d);
1599		++d->bd_rcount;
1600		/*
1601		 * NB: We dont call BPF_CHECK_DIRECTION() here since there is no
1602		 * way for the caller to indiciate to us whether this packet
1603		 * is inbound or outbound.  In the bpf_mtap() routines, we use
1604		 * the interface pointers on the mbuf to figure it out.
1605		 */
1606#ifdef BPF_JITTER
1607		bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
1608		if (bf != NULL)
1609			slen = (*(bf->func))(pkt, pktlen, pktlen);
1610		else
1611#endif
1612		slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
1613		if (slen != 0) {
1614			d->bd_fcount++;
1615			if (!gottime) {
1616				microtime(&tv);
1617				gottime = 1;
1618			}
1619#ifdef MAC
1620			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
1621#endif
1622				catchpacket(d, pkt, pktlen, slen,
1623				    bpf_append_bytes, &tv);
1624		}
1625		BPFD_UNLOCK(d);
1626	}
1627	BPFIF_UNLOCK(bp);
1628}
1629
1630#define	BPF_CHECK_DIRECTION(d, r, i)				\
1631	    (((d)->bd_direction == BPF_D_IN && (r) != (i)) ||	\
1632	    ((d)->bd_direction == BPF_D_OUT && (r) == (i)))
1633
1634/*
1635 * Incoming linkage from device drivers, when packet is in an mbuf chain.
1636 */
1637void
1638bpf_mtap(struct bpf_if *bp, struct mbuf *m)
1639{
1640	struct bpf_d *d;
1641#ifdef BPF_JITTER
1642	bpf_jit_filter *bf;
1643#endif
1644	u_int pktlen, slen;
1645	int gottime;
1646	struct timeval tv;
1647
1648	/* Skip outgoing duplicate packets. */
1649	if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
1650		m->m_flags &= ~M_PROMISC;
1651		return;
1652	}
1653
1654	gottime = 0;
1655
1656	pktlen = m_length(m, NULL);
1657
1658	BPFIF_LOCK(bp);
1659	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1660		if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
1661			continue;
1662		BPFD_LOCK(d);
1663		++d->bd_rcount;
1664#ifdef BPF_JITTER
1665		bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
1666		/* XXX We cannot handle multiple mbufs. */
1667		if (bf != NULL && m->m_next == NULL)
1668			slen = (*(bf->func))(mtod(m, u_char *), pktlen, pktlen);
1669		else
1670#endif
1671		slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
1672		if (slen != 0) {
1673			d->bd_fcount++;
1674			if (!gottime) {
1675				microtime(&tv);
1676				gottime = 1;
1677			}
1678#ifdef MAC
1679			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
1680#endif
1681				catchpacket(d, (u_char *)m, pktlen, slen,
1682				    bpf_append_mbuf, &tv);
1683		}
1684		BPFD_UNLOCK(d);
1685	}
1686	BPFIF_UNLOCK(bp);
1687}
1688
1689/*
1690 * Incoming linkage from device drivers, when packet is in
1691 * an mbuf chain and to be prepended by a contiguous header.
1692 */
1693void
1694bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
1695{
1696	struct mbuf mb;
1697	struct bpf_d *d;
1698	u_int pktlen, slen;
1699	int gottime;
1700	struct timeval tv;
1701
1702	/* Skip outgoing duplicate packets. */
1703	if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
1704		m->m_flags &= ~M_PROMISC;
1705		return;
1706	}
1707
1708	gottime = 0;
1709
1710	pktlen = m_length(m, NULL);
1711	/*
1712	 * Craft on-stack mbuf suitable for passing to bpf_filter.
1713	 * Note that we cut corners here; we only setup what's
1714	 * absolutely needed--this mbuf should never go anywhere else.
1715	 */
1716	mb.m_next = m;
1717	mb.m_data = data;
1718	mb.m_len = dlen;
1719	pktlen += dlen;
1720
1721	BPFIF_LOCK(bp);
1722	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1723		if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
1724			continue;
1725		BPFD_LOCK(d);
1726		++d->bd_rcount;
1727		slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
1728		if (slen != 0) {
1729			d->bd_fcount++;
1730			if (!gottime) {
1731				microtime(&tv);
1732				gottime = 1;
1733			}
1734#ifdef MAC
1735			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
1736#endif
1737				catchpacket(d, (u_char *)&mb, pktlen, slen,
1738				    bpf_append_mbuf, &tv);
1739		}
1740		BPFD_UNLOCK(d);
1741	}
1742	BPFIF_UNLOCK(bp);
1743}
1744
1745#undef	BPF_CHECK_DIRECTION
1746
1747/*
1748 * Move the packet data from interface memory (pkt) into the
1749 * store buffer.  "cpfn" is the routine called to do the actual data
1750 * transfer.  bcopy is passed in to copy contiguous chunks, while
1751 * bpf_append_mbuf is passed in to copy mbuf chains.  In the latter case,
1752 * pkt is really an mbuf.
1753 */
1754static void
1755catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
1756    void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int),
1757    struct timeval *tv)
1758{
1759	struct bpf_hdr hdr;
1760	int totlen, curlen;
1761	int hdrlen = d->bd_bif->bif_hdrlen;
1762	int do_wakeup = 0;
1763
1764	BPFD_LOCK_ASSERT(d);
1765
1766	/*
1767	 * Detect whether user space has released a buffer back to us, and if
1768	 * so, move it from being a hold buffer to a free buffer.  This may
1769	 * not be the best place to do it (for example, we might only want to
1770	 * run this check if we need the space), but for now it's a reliable
1771	 * spot to do it.
1772	 */
1773	if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) {
1774		d->bd_fbuf = d->bd_hbuf;
1775		d->bd_hbuf = NULL;
1776		d->bd_hlen = 0;
1777		bpf_buf_reclaimed(d);
1778	}
1779
1780	/*
1781	 * Figure out how many bytes to move.  If the packet is
1782	 * greater or equal to the snapshot length, transfer that
1783	 * much.  Otherwise, transfer the whole packet (unless
1784	 * we hit the buffer size limit).
1785	 */
1786	totlen = hdrlen + min(snaplen, pktlen);
1787	if (totlen > d->bd_bufsize)
1788		totlen = d->bd_bufsize;
1789
1790	/*
1791	 * Round up the end of the previous packet to the next longword.
1792	 *
1793	 * Drop the packet if there's no room and no hope of room
1794	 * If the packet would overflow the storage buffer or the storage
1795	 * buffer is considered immutable by the buffer model, try to rotate
1796	 * the buffer and wakeup pending processes.
1797	 */
1798	curlen = BPF_WORDALIGN(d->bd_slen);
1799	if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) {
1800		if (d->bd_fbuf == NULL) {
1801			/*
1802			 * There's no room in the store buffer, and no
1803			 * prospect of room, so drop the packet.  Notify the
1804			 * buffer model.
1805			 */
1806			bpf_buffull(d);
1807			++d->bd_dcount;
1808			return;
1809		}
1810		ROTATE_BUFFERS(d);
1811		do_wakeup = 1;
1812		curlen = 0;
1813	} else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
1814		/*
1815		 * Immediate mode is set, or the read timeout has already
1816		 * expired during a select call.  A packet arrived, so the
1817		 * reader should be woken up.
1818		 */
1819		do_wakeup = 1;
1820
1821	/*
1822	 * Append the bpf header.  Note we append the actual header size, but
1823	 * move forward the length of the header plus padding.
1824	 */
1825	bzero(&hdr, sizeof(hdr));
1826	hdr.bh_tstamp = *tv;
1827	hdr.bh_datalen = pktlen;
1828	hdr.bh_hdrlen = hdrlen;
1829	hdr.bh_caplen = totlen - hdrlen;
1830	bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr));
1831
1832	/*
1833	 * Copy the packet data into the store buffer and update its length.
1834	 */
1835	(*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, hdr.bh_caplen);
1836	d->bd_slen = curlen + totlen;
1837
1838	if (do_wakeup)
1839		bpf_wakeup(d);
1840}
1841
1842/*
1843 * Free buffers currently in use by a descriptor.
1844 * Called on close.
1845 */
1846static void
1847bpf_freed(struct bpf_d *d)
1848{
1849
1850	/*
1851	 * We don't need to lock out interrupts since this descriptor has
1852	 * been detached from its interface and it yet hasn't been marked
1853	 * free.
1854	 */
1855	bpf_free(d);
1856	if (d->bd_rfilter) {
1857		free((caddr_t)d->bd_rfilter, M_BPF);
1858#ifdef BPF_JITTER
1859		bpf_destroy_jit_filter(d->bd_bfilter);
1860#endif
1861	}
1862	if (d->bd_wfilter)
1863		free((caddr_t)d->bd_wfilter, M_BPF);
1864	mtx_destroy(&d->bd_mtx);
1865}
1866
1867/*
1868 * Attach an interface to bpf.  dlt is the link layer type; hdrlen is the
1869 * fixed size of the link header (variable length headers not yet supported).
1870 */
1871void
1872bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
1873{
1874
1875	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
1876}
1877
1878/*
1879 * Attach an interface to bpf.  ifp is a pointer to the structure
1880 * defining the interface to be attached, dlt is the link layer type,
1881 * and hdrlen is the fixed size of the link header (variable length
1882 * headers are not yet supporrted).
1883 */
1884void
1885bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
1886{
1887	struct bpf_if *bp;
1888
1889	bp = malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
1890	if (bp == NULL)
1891		panic("bpfattach");
1892
1893	LIST_INIT(&bp->bif_dlist);
1894	bp->bif_ifp = ifp;
1895	bp->bif_dlt = dlt;
1896	mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF);
1897	KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized"));
1898	*driverp = bp;
1899
1900	mtx_lock(&bpf_mtx);
1901	LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
1902	mtx_unlock(&bpf_mtx);
1903
1904	/*
1905	 * Compute the length of the bpf header.  This is not necessarily
1906	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1907	 * that the network layer header begins on a longword boundary (for
1908	 * performance reasons and to alleviate alignment restrictions).
1909	 */
1910	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1911
1912	if (bootverbose)
1913		if_printf(ifp, "bpf attached\n");
1914}
1915
1916/*
1917 * Detach bpf from an interface.  This involves detaching each descriptor
1918 * associated with the interface, and leaving bd_bif NULL.  Notify each
1919 * descriptor as it's detached so that any sleepers wake up and get
1920 * ENXIO.
1921 */
1922void
1923bpfdetach(struct ifnet *ifp)
1924{
1925	struct bpf_if	*bp;
1926	struct bpf_d	*d;
1927
1928	/* Locate BPF interface information */
1929	mtx_lock(&bpf_mtx);
1930	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1931		if (ifp == bp->bif_ifp)
1932			break;
1933	}
1934
1935	/* Interface wasn't attached */
1936	if ((bp == NULL) || (bp->bif_ifp == NULL)) {
1937		mtx_unlock(&bpf_mtx);
1938		printf("bpfdetach: %s was not attached\n", ifp->if_xname);
1939		return;
1940	}
1941
1942	LIST_REMOVE(bp, bif_next);
1943	mtx_unlock(&bpf_mtx);
1944
1945	while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
1946		bpf_detachd(d);
1947		BPFD_LOCK(d);
1948		bpf_wakeup(d);
1949		BPFD_UNLOCK(d);
1950	}
1951
1952	mtx_destroy(&bp->bif_mtx);
1953	free(bp, M_BPF);
1954}
1955
1956/*
1957 * Get a list of available data link type of the interface.
1958 */
1959static int
1960bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
1961{
1962	int n, error;
1963	struct ifnet *ifp;
1964	struct bpf_if *bp;
1965
1966	ifp = d->bd_bif->bif_ifp;
1967	n = 0;
1968	error = 0;
1969	mtx_lock(&bpf_mtx);
1970	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1971		if (bp->bif_ifp != ifp)
1972			continue;
1973		if (bfl->bfl_list != NULL) {
1974			if (n >= bfl->bfl_len) {
1975				mtx_unlock(&bpf_mtx);
1976				return (ENOMEM);
1977			}
1978			error = copyout(&bp->bif_dlt,
1979			    bfl->bfl_list + n, sizeof(u_int));
1980		}
1981		n++;
1982	}
1983	mtx_unlock(&bpf_mtx);
1984	bfl->bfl_len = n;
1985	return (error);
1986}
1987
1988/*
1989 * Set the data link type of a BPF instance.
1990 */
1991static int
1992bpf_setdlt(struct bpf_d *d, u_int dlt)
1993{
1994	int error, opromisc;
1995	struct ifnet *ifp;
1996	struct bpf_if *bp;
1997
1998	if (d->bd_bif->bif_dlt == dlt)
1999		return (0);
2000	ifp = d->bd_bif->bif_ifp;
2001	mtx_lock(&bpf_mtx);
2002	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2003		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
2004			break;
2005	}
2006	mtx_unlock(&bpf_mtx);
2007	if (bp != NULL) {
2008		opromisc = d->bd_promisc;
2009		bpf_detachd(d);
2010		bpf_attachd(d, bp);
2011		BPFD_LOCK(d);
2012		reset_d(d);
2013		BPFD_UNLOCK(d);
2014		if (opromisc) {
2015			error = ifpromisc(bp->bif_ifp, 1);
2016			if (error)
2017				if_printf(bp->bif_ifp,
2018					"bpf_setdlt: ifpromisc failed (%d)\n",
2019					error);
2020			else
2021				d->bd_promisc = 1;
2022		}
2023	}
2024	return (bp == NULL ? EINVAL : 0);
2025}
2026
2027static void
2028bpf_drvinit(void *unused)
2029{
2030	struct cdev *dev;
2031
2032	mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
2033	LIST_INIT(&bpf_iflist);
2034
2035	dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf");
2036	/* For compatibility */
2037	make_dev_alias(dev, "bpf0");
2038
2039}
2040
2041/*
2042 * Zero out the various packet counters associated with all of the bpf
2043 * descriptors.  At some point, we will probably want to get a bit more
2044 * granular and allow the user to specify descriptors to be zeroed.
2045 */
2046static void
2047bpf_zero_counters(void)
2048{
2049	struct bpf_if *bp;
2050	struct bpf_d *bd;
2051
2052	mtx_lock(&bpf_mtx);
2053	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2054		BPFIF_LOCK(bp);
2055		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
2056			BPFD_LOCK(bd);
2057			bd->bd_rcount = 0;
2058			bd->bd_dcount = 0;
2059			bd->bd_fcount = 0;
2060			bd->bd_wcount = 0;
2061			bd->bd_wfcount = 0;
2062			bd->bd_zcopy = 0;
2063			BPFD_UNLOCK(bd);
2064		}
2065		BPFIF_UNLOCK(bp);
2066	}
2067	mtx_unlock(&bpf_mtx);
2068}
2069
2070static void
2071bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
2072{
2073
2074	bzero(d, sizeof(*d));
2075	BPFD_LOCK_ASSERT(bd);
2076	d->bd_structsize = sizeof(*d);
2077	d->bd_immediate = bd->bd_immediate;
2078	d->bd_promisc = bd->bd_promisc;
2079	d->bd_hdrcmplt = bd->bd_hdrcmplt;
2080	d->bd_direction = bd->bd_direction;
2081	d->bd_feedback = bd->bd_feedback;
2082	d->bd_async = bd->bd_async;
2083	d->bd_rcount = bd->bd_rcount;
2084	d->bd_dcount = bd->bd_dcount;
2085	d->bd_fcount = bd->bd_fcount;
2086	d->bd_sig = bd->bd_sig;
2087	d->bd_slen = bd->bd_slen;
2088	d->bd_hlen = bd->bd_hlen;
2089	d->bd_bufsize = bd->bd_bufsize;
2090	d->bd_pid = bd->bd_pid;
2091	strlcpy(d->bd_ifname,
2092	    bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
2093	d->bd_locked = bd->bd_locked;
2094	d->bd_wcount = bd->bd_wcount;
2095	d->bd_wdcount = bd->bd_wdcount;
2096	d->bd_wfcount = bd->bd_wfcount;
2097	d->bd_zcopy = bd->bd_zcopy;
2098	d->bd_bufmode = bd->bd_bufmode;
2099}
2100
2101static int
2102bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
2103{
2104	struct xbpf_d *xbdbuf, *xbd, zerostats;
2105	int index, error;
2106	struct bpf_if *bp;
2107	struct bpf_d *bd;
2108
2109	/*
2110	 * XXX This is not technically correct. It is possible for non
2111	 * privileged users to open bpf devices. It would make sense
2112	 * if the users who opened the devices were able to retrieve
2113	 * the statistics for them, too.
2114	 */
2115	error = priv_check(req->td, PRIV_NET_BPF);
2116	if (error)
2117		return (error);
2118	/*
2119	 * Check to see if the user is requesting that the counters be
2120	 * zeroed out.  Explicitly check that the supplied data is zeroed,
2121	 * as we aren't allowing the user to set the counters currently.
2122	 */
2123	if (req->newptr != NULL) {
2124		if (req->newlen != sizeof(zerostats))
2125			return (EINVAL);
2126		bzero(&zerostats, sizeof(zerostats));
2127		xbd = req->newptr;
2128		if (bcmp(xbd, &zerostats, sizeof(*xbd)) != 0)
2129			return (EINVAL);
2130		bpf_zero_counters();
2131		return (0);
2132	}
2133	if (req->oldptr == NULL)
2134		return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd)));
2135	if (bpf_bpfd_cnt == 0)
2136		return (SYSCTL_OUT(req, 0, 0));
2137	xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK);
2138	mtx_lock(&bpf_mtx);
2139	if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) {
2140		mtx_unlock(&bpf_mtx);
2141		free(xbdbuf, M_BPF);
2142		return (ENOMEM);
2143	}
2144	index = 0;
2145	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2146		BPFIF_LOCK(bp);
2147		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
2148			xbd = &xbdbuf[index++];
2149			BPFD_LOCK(bd);
2150			bpfstats_fill_xbpf(xbd, bd);
2151			BPFD_UNLOCK(bd);
2152		}
2153		BPFIF_UNLOCK(bp);
2154	}
2155	mtx_unlock(&bpf_mtx);
2156	error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
2157	free(xbdbuf, M_BPF);
2158	return (error);
2159}
2160
2161SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL);
2162
2163#else /* !DEV_BPF && !NETGRAPH_BPF */
2164/*
2165 * NOP stubs to allow bpf-using drivers to load and function.
2166 *
2167 * A 'better' implementation would allow the core bpf functionality
2168 * to be loaded at runtime.
2169 */
2170static struct bpf_if bp_null;
2171
2172void
2173bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
2174{
2175}
2176
2177void
2178bpf_mtap(struct bpf_if *bp, struct mbuf *m)
2179{
2180}
2181
2182void
2183bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m)
2184{
2185}
2186
2187void
2188bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
2189{
2190
2191	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
2192}
2193
2194void
2195bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
2196{
2197
2198	*driverp = &bp_null;
2199}
2200
2201void
2202bpfdetach(struct ifnet *ifp)
2203{
2204}
2205
2206u_int
2207bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
2208{
2209	return -1;	/* "no filter" behaviour */
2210}
2211
2212int
2213bpf_validate(const struct bpf_insn *f, int len)
2214{
2215	return 0;		/* false */
2216}
2217
2218#endif /* !DEV_BPF && !NETGRAPH_BPF */
2219