bpf.c revision 207278
1/*-
2 * Copyright (c) 1990, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from the Stanford/CMU enet packet filter,
6 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
7 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
8 * Berkeley Laboratory.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *      @(#)bpf.c	8.4 (Berkeley) 1/9/95
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD: head/sys/net/bpf.c 207278 2010-04-27 15:16:54Z bz $");
39
40#include "opt_bpf.h"
41#include "opt_compat.h"
42#include "opt_netgraph.h"
43
44#include <sys/types.h>
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/conf.h>
48#include <sys/fcntl.h>
49#include <sys/jail.h>
50#include <sys/malloc.h>
51#include <sys/mbuf.h>
52#include <sys/time.h>
53#include <sys/priv.h>
54#include <sys/proc.h>
55#include <sys/signalvar.h>
56#include <sys/filio.h>
57#include <sys/sockio.h>
58#include <sys/ttycom.h>
59#include <sys/uio.h>
60
61#include <sys/event.h>
62#include <sys/file.h>
63#include <sys/poll.h>
64#include <sys/proc.h>
65
66#include <sys/socket.h>
67
68#include <net/if.h>
69#include <net/bpf.h>
70#include <net/bpf_buffer.h>
71#ifdef BPF_JITTER
72#include <net/bpf_jitter.h>
73#endif
74#include <net/bpf_zerocopy.h>
75#include <net/bpfdesc.h>
76#include <net/vnet.h>
77
78#include <netinet/in.h>
79#include <netinet/if_ether.h>
80#include <sys/kernel.h>
81#include <sys/sysctl.h>
82
83#include <net80211/ieee80211_freebsd.h>
84
85#include <security/mac/mac_framework.h>
86
87MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
88
89#if defined(DEV_BPF) || defined(NETGRAPH_BPF)
90
91#define PRINET  26			/* interruptible */
92
93#ifdef COMPAT_FREEBSD32
94#include <sys/mount.h>
95#include <compat/freebsd32/freebsd32.h>
96#define BPF_ALIGNMENT32 sizeof(int32_t)
97#define BPF_WORDALIGN32(x) (((x)+(BPF_ALIGNMENT32-1))&~(BPF_ALIGNMENT32-1))
98
99/*
100 * 32-bit version of structure prepended to each packet.  We use this header
101 * instead of the standard one for 32-bit streams.  We mark the a stream as
102 * 32-bit the first time we see a 32-bit compat ioctl request.
103 */
104struct bpf_hdr32 {
105	struct timeval32 bh_tstamp;	/* time stamp */
106	uint32_t	bh_caplen;	/* length of captured portion */
107	uint32_t	bh_datalen;	/* original length of packet */
108	uint16_t	bh_hdrlen;	/* length of bpf header (this struct
109					   plus alignment padding) */
110};
111
112struct bpf_program32 {
113	u_int bf_len;
114	uint32_t bf_insns;
115};
116
117struct bpf_dltlist32 {
118	u_int	bfl_len;
119	u_int	bfl_list;
120};
121
122#define	BIOCSETF32	_IOW('B', 103, struct bpf_program32)
123#define	BIOCSRTIMEOUT32	_IOW('B',109, struct timeval32)
124#define	BIOCGRTIMEOUT32	_IOR('B',110, struct timeval32)
125#define	BIOCGDLTLIST32	_IOWR('B',121, struct bpf_dltlist32)
126#define	BIOCSETWF32	_IOW('B',123, struct bpf_program32)
127#define	BIOCSETFNR32	_IOW('B',130, struct bpf_program32)
128#endif
129
130/*
131 * bpf_iflist is a list of BPF interface structures, each corresponding to a
132 * specific DLT.  The same network interface might have several BPF interface
133 * structures registered by different layers in the stack (i.e., 802.11
134 * frames, ethernet frames, etc).
135 */
136static LIST_HEAD(, bpf_if)	bpf_iflist;
137static struct mtx	bpf_mtx;		/* bpf global lock */
138static int		bpf_bpfd_cnt;
139
140static void	bpf_attachd(struct bpf_d *, struct bpf_if *);
141static void	bpf_detachd(struct bpf_d *);
142static void	bpf_freed(struct bpf_d *);
143static int	bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
144		    struct sockaddr *, int *, struct bpf_insn *);
145static int	bpf_setif(struct bpf_d *, struct ifreq *);
146static void	bpf_timed_out(void *);
147static __inline void
148		bpf_wakeup(struct bpf_d *);
149static void	catchpacket(struct bpf_d *, u_char *, u_int, u_int,
150		    void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int),
151		    struct timeval *);
152static void	reset_d(struct bpf_d *);
153static int	 bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
154static int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
155static int	bpf_setdlt(struct bpf_d *, u_int);
156static void	filt_bpfdetach(struct knote *);
157static int	filt_bpfread(struct knote *, long);
158static void	bpf_drvinit(void *);
159static int	bpf_stats_sysctl(SYSCTL_HANDLER_ARGS);
160
161SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl");
162int bpf_maxinsns = BPF_MAXINSNS;
163SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
164    &bpf_maxinsns, 0, "Maximum bpf program instructions");
165static int bpf_zerocopy_enable = 0;
166SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW,
167    &bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions");
168SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW,
169    bpf_stats_sysctl, "bpf statistics portal");
170
171static	d_open_t	bpfopen;
172static	d_read_t	bpfread;
173static	d_write_t	bpfwrite;
174static	d_ioctl_t	bpfioctl;
175static	d_poll_t	bpfpoll;
176static	d_kqfilter_t	bpfkqfilter;
177
178static struct cdevsw bpf_cdevsw = {
179	.d_version =	D_VERSION,
180	.d_open =	bpfopen,
181	.d_read =	bpfread,
182	.d_write =	bpfwrite,
183	.d_ioctl =	bpfioctl,
184	.d_poll =	bpfpoll,
185	.d_name =	"bpf",
186	.d_kqfilter =	bpfkqfilter,
187};
188
189static struct filterops bpfread_filtops = {
190	.f_isfd = 1,
191	.f_detach = filt_bpfdetach,
192	.f_event = filt_bpfread,
193};
194
195/*
196 * Wrapper functions for various buffering methods.  If the set of buffer
197 * modes expands, we will probably want to introduce a switch data structure
198 * similar to protosw, et.
199 */
200static void
201bpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
202    u_int len)
203{
204
205	BPFD_LOCK_ASSERT(d);
206
207	switch (d->bd_bufmode) {
208	case BPF_BUFMODE_BUFFER:
209		return (bpf_buffer_append_bytes(d, buf, offset, src, len));
210
211	case BPF_BUFMODE_ZBUF:
212		d->bd_zcopy++;
213		return (bpf_zerocopy_append_bytes(d, buf, offset, src, len));
214
215	default:
216		panic("bpf_buf_append_bytes");
217	}
218}
219
220static void
221bpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
222    u_int len)
223{
224
225	BPFD_LOCK_ASSERT(d);
226
227	switch (d->bd_bufmode) {
228	case BPF_BUFMODE_BUFFER:
229		return (bpf_buffer_append_mbuf(d, buf, offset, src, len));
230
231	case BPF_BUFMODE_ZBUF:
232		d->bd_zcopy++;
233		return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len));
234
235	default:
236		panic("bpf_buf_append_mbuf");
237	}
238}
239
240/*
241 * This function gets called when the free buffer is re-assigned.
242 */
243static void
244bpf_buf_reclaimed(struct bpf_d *d)
245{
246
247	BPFD_LOCK_ASSERT(d);
248
249	switch (d->bd_bufmode) {
250	case BPF_BUFMODE_BUFFER:
251		return;
252
253	case BPF_BUFMODE_ZBUF:
254		bpf_zerocopy_buf_reclaimed(d);
255		return;
256
257	default:
258		panic("bpf_buf_reclaimed");
259	}
260}
261
262/*
263 * If the buffer mechanism has a way to decide that a held buffer can be made
264 * free, then it is exposed via the bpf_canfreebuf() interface.  (1) is
265 * returned if the buffer can be discarded, (0) is returned if it cannot.
266 */
267static int
268bpf_canfreebuf(struct bpf_d *d)
269{
270
271	BPFD_LOCK_ASSERT(d);
272
273	switch (d->bd_bufmode) {
274	case BPF_BUFMODE_ZBUF:
275		return (bpf_zerocopy_canfreebuf(d));
276	}
277	return (0);
278}
279
280/*
281 * Allow the buffer model to indicate that the current store buffer is
282 * immutable, regardless of the appearance of space.  Return (1) if the
283 * buffer is writable, and (0) if not.
284 */
285static int
286bpf_canwritebuf(struct bpf_d *d)
287{
288
289	BPFD_LOCK_ASSERT(d);
290
291	switch (d->bd_bufmode) {
292	case BPF_BUFMODE_ZBUF:
293		return (bpf_zerocopy_canwritebuf(d));
294	}
295	return (1);
296}
297
298/*
299 * Notify buffer model that an attempt to write to the store buffer has
300 * resulted in a dropped packet, in which case the buffer may be considered
301 * full.
302 */
303static void
304bpf_buffull(struct bpf_d *d)
305{
306
307	BPFD_LOCK_ASSERT(d);
308
309	switch (d->bd_bufmode) {
310	case BPF_BUFMODE_ZBUF:
311		bpf_zerocopy_buffull(d);
312		break;
313	}
314}
315
316/*
317 * Notify the buffer model that a buffer has moved into the hold position.
318 */
319void
320bpf_bufheld(struct bpf_d *d)
321{
322
323	BPFD_LOCK_ASSERT(d);
324
325	switch (d->bd_bufmode) {
326	case BPF_BUFMODE_ZBUF:
327		bpf_zerocopy_bufheld(d);
328		break;
329	}
330}
331
332static void
333bpf_free(struct bpf_d *d)
334{
335
336	switch (d->bd_bufmode) {
337	case BPF_BUFMODE_BUFFER:
338		return (bpf_buffer_free(d));
339
340	case BPF_BUFMODE_ZBUF:
341		return (bpf_zerocopy_free(d));
342
343	default:
344		panic("bpf_buf_free");
345	}
346}
347
348static int
349bpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio)
350{
351
352	if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
353		return (EOPNOTSUPP);
354	return (bpf_buffer_uiomove(d, buf, len, uio));
355}
356
357static int
358bpf_ioctl_sblen(struct bpf_d *d, u_int *i)
359{
360
361	if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
362		return (EOPNOTSUPP);
363	return (bpf_buffer_ioctl_sblen(d, i));
364}
365
366static int
367bpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i)
368{
369
370	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
371		return (EOPNOTSUPP);
372	return (bpf_zerocopy_ioctl_getzmax(td, d, i));
373}
374
375static int
376bpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
377{
378
379	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
380		return (EOPNOTSUPP);
381	return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz));
382}
383
384static int
385bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
386{
387
388	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
389		return (EOPNOTSUPP);
390	return (bpf_zerocopy_ioctl_setzbuf(td, d, bz));
391}
392
393/*
394 * General BPF functions.
395 */
396static int
397bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
398    struct sockaddr *sockp, int *hdrlen, struct bpf_insn *wfilter)
399{
400	const struct ieee80211_bpf_params *p;
401	struct ether_header *eh;
402	struct mbuf *m;
403	int error;
404	int len;
405	int hlen;
406	int slen;
407
408	/*
409	 * Build a sockaddr based on the data link layer type.
410	 * We do this at this level because the ethernet header
411	 * is copied directly into the data field of the sockaddr.
412	 * In the case of SLIP, there is no header and the packet
413	 * is forwarded as is.
414	 * Also, we are careful to leave room at the front of the mbuf
415	 * for the link level header.
416	 */
417	switch (linktype) {
418
419	case DLT_SLIP:
420		sockp->sa_family = AF_INET;
421		hlen = 0;
422		break;
423
424	case DLT_EN10MB:
425		sockp->sa_family = AF_UNSPEC;
426		/* XXX Would MAXLINKHDR be better? */
427		hlen = ETHER_HDR_LEN;
428		break;
429
430	case DLT_FDDI:
431		sockp->sa_family = AF_IMPLINK;
432		hlen = 0;
433		break;
434
435	case DLT_RAW:
436		sockp->sa_family = AF_UNSPEC;
437		hlen = 0;
438		break;
439
440	case DLT_NULL:
441		/*
442		 * null interface types require a 4 byte pseudo header which
443		 * corresponds to the address family of the packet.
444		 */
445		sockp->sa_family = AF_UNSPEC;
446		hlen = 4;
447		break;
448
449	case DLT_ATM_RFC1483:
450		/*
451		 * en atm driver requires 4-byte atm pseudo header.
452		 * though it isn't standard, vpi:vci needs to be
453		 * specified anyway.
454		 */
455		sockp->sa_family = AF_UNSPEC;
456		hlen = 12;	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
457		break;
458
459	case DLT_PPP:
460		sockp->sa_family = AF_UNSPEC;
461		hlen = 4;	/* This should match PPP_HDRLEN */
462		break;
463
464	case DLT_IEEE802_11:		/* IEEE 802.11 wireless */
465		sockp->sa_family = AF_IEEE80211;
466		hlen = 0;
467		break;
468
469	case DLT_IEEE802_11_RADIO:	/* IEEE 802.11 wireless w/ phy params */
470		sockp->sa_family = AF_IEEE80211;
471		sockp->sa_len = 12;	/* XXX != 0 */
472		hlen = sizeof(struct ieee80211_bpf_params);
473		break;
474
475	default:
476		return (EIO);
477	}
478
479	len = uio->uio_resid;
480
481	if (len - hlen > ifp->if_mtu)
482		return (EMSGSIZE);
483
484	if ((unsigned)len > MJUM16BYTES)
485		return (EIO);
486
487	if (len <= MHLEN)
488		MGETHDR(m, M_WAIT, MT_DATA);
489	else if (len <= MCLBYTES)
490		m = m_getcl(M_WAIT, MT_DATA, M_PKTHDR);
491	else
492		m = m_getjcl(M_WAIT, MT_DATA, M_PKTHDR,
493#if (MJUMPAGESIZE > MCLBYTES)
494		    len <= MJUMPAGESIZE ? MJUMPAGESIZE :
495#endif
496		    (len <= MJUM9BYTES ? MJUM9BYTES : MJUM16BYTES));
497	m->m_pkthdr.len = m->m_len = len;
498	m->m_pkthdr.rcvif = NULL;
499	*mp = m;
500
501	if (m->m_len < hlen) {
502		error = EPERM;
503		goto bad;
504	}
505
506	error = uiomove(mtod(m, u_char *), len, uio);
507	if (error)
508		goto bad;
509
510	slen = bpf_filter(wfilter, mtod(m, u_char *), len, len);
511	if (slen == 0) {
512		error = EPERM;
513		goto bad;
514	}
515
516	/* Check for multicast destination */
517	switch (linktype) {
518	case DLT_EN10MB:
519		eh = mtod(m, struct ether_header *);
520		if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
521			if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost,
522			    ETHER_ADDR_LEN) == 0)
523				m->m_flags |= M_BCAST;
524			else
525				m->m_flags |= M_MCAST;
526		}
527		break;
528	}
529
530	/*
531	 * Make room for link header, and copy it to sockaddr
532	 */
533	if (hlen != 0) {
534		if (sockp->sa_family == AF_IEEE80211) {
535			/*
536			 * Collect true length from the parameter header
537			 * NB: sockp is known to be zero'd so if we do a
538			 *     short copy unspecified parameters will be
539			 *     zero.
540			 * NB: packet may not be aligned after stripping
541			 *     bpf params
542			 * XXX check ibp_vers
543			 */
544			p = mtod(m, const struct ieee80211_bpf_params *);
545			hlen = p->ibp_len;
546			if (hlen > sizeof(sockp->sa_data)) {
547				error = EINVAL;
548				goto bad;
549			}
550		}
551		bcopy(m->m_data, sockp->sa_data, hlen);
552	}
553	*hdrlen = hlen;
554
555	return (0);
556bad:
557	m_freem(m);
558	return (error);
559}
560
561/*
562 * Attach file to the bpf interface, i.e. make d listen on bp.
563 */
564static void
565bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
566{
567	/*
568	 * Point d at bp, and add d to the interface's list of listeners.
569	 * Finally, point the driver's bpf cookie at the interface so
570	 * it will divert packets to bpf.
571	 */
572	BPFIF_LOCK(bp);
573	d->bd_bif = bp;
574	LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
575
576	bpf_bpfd_cnt++;
577	BPFIF_UNLOCK(bp);
578
579	EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
580}
581
582/*
583 * Detach a file from its interface.
584 */
585static void
586bpf_detachd(struct bpf_d *d)
587{
588	int error;
589	struct bpf_if *bp;
590	struct ifnet *ifp;
591
592	bp = d->bd_bif;
593	BPFIF_LOCK(bp);
594	BPFD_LOCK(d);
595	ifp = d->bd_bif->bif_ifp;
596
597	/*
598	 * Remove d from the interface's descriptor list.
599	 */
600	LIST_REMOVE(d, bd_next);
601
602	bpf_bpfd_cnt--;
603	d->bd_bif = NULL;
604	BPFD_UNLOCK(d);
605	BPFIF_UNLOCK(bp);
606
607	EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0);
608
609	/*
610	 * Check if this descriptor had requested promiscuous mode.
611	 * If so, turn it off.
612	 */
613	if (d->bd_promisc) {
614		d->bd_promisc = 0;
615		CURVNET_SET(ifp->if_vnet);
616		error = ifpromisc(ifp, 0);
617		CURVNET_RESTORE();
618		if (error != 0 && error != ENXIO) {
619			/*
620			 * ENXIO can happen if a pccard is unplugged
621			 * Something is really wrong if we were able to put
622			 * the driver into promiscuous mode, but can't
623			 * take it out.
624			 */
625			if_printf(bp->bif_ifp,
626				"bpf_detach: ifpromisc failed (%d)\n", error);
627		}
628	}
629}
630
631/*
632 * Close the descriptor by detaching it from its interface,
633 * deallocating its buffers, and marking it free.
634 */
635static void
636bpf_dtor(void *data)
637{
638	struct bpf_d *d = data;
639
640	BPFD_LOCK(d);
641	if (d->bd_state == BPF_WAITING)
642		callout_stop(&d->bd_callout);
643	d->bd_state = BPF_IDLE;
644	BPFD_UNLOCK(d);
645	funsetown(&d->bd_sigio);
646	mtx_lock(&bpf_mtx);
647	if (d->bd_bif)
648		bpf_detachd(d);
649	mtx_unlock(&bpf_mtx);
650	selwakeuppri(&d->bd_sel, PRINET);
651#ifdef MAC
652	mac_bpfdesc_destroy(d);
653#endif /* MAC */
654	knlist_destroy(&d->bd_sel.si_note);
655	callout_drain(&d->bd_callout);
656	bpf_freed(d);
657	free(d, M_BPF);
658}
659
660/*
661 * Open ethernet device.  Returns ENXIO for illegal minor device number,
662 * EBUSY if file is open by another process.
663 */
664/* ARGSUSED */
665static	int
666bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
667{
668	struct bpf_d *d;
669	int error;
670
671	d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
672	error = devfs_set_cdevpriv(d, bpf_dtor);
673	if (error != 0) {
674		free(d, M_BPF);
675		return (error);
676	}
677
678	/*
679	 * For historical reasons, perform a one-time initialization call to
680	 * the buffer routines, even though we're not yet committed to a
681	 * particular buffer method.
682	 */
683	bpf_buffer_init(d);
684	d->bd_bufmode = BPF_BUFMODE_BUFFER;
685	d->bd_sig = SIGIO;
686	d->bd_direction = BPF_D_INOUT;
687	d->bd_pid = td->td_proc->p_pid;
688#ifdef MAC
689	mac_bpfdesc_init(d);
690	mac_bpfdesc_create(td->td_ucred, d);
691#endif
692	mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
693	callout_init_mtx(&d->bd_callout, &d->bd_mtx, 0);
694	knlist_init_mtx(&d->bd_sel.si_note, &d->bd_mtx);
695
696	return (0);
697}
698
699/*
700 *  bpfread - read next chunk of packets from buffers
701 */
702static	int
703bpfread(struct cdev *dev, struct uio *uio, int ioflag)
704{
705	struct bpf_d *d;
706	int error;
707	int non_block;
708	int timed_out;
709
710	error = devfs_get_cdevpriv((void **)&d);
711	if (error != 0)
712		return (error);
713
714	/*
715	 * Restrict application to use a buffer the same size as
716	 * as kernel buffers.
717	 */
718	if (uio->uio_resid != d->bd_bufsize)
719		return (EINVAL);
720
721	non_block = ((ioflag & O_NONBLOCK) != 0);
722
723	BPFD_LOCK(d);
724	d->bd_pid = curthread->td_proc->p_pid;
725	if (d->bd_bufmode != BPF_BUFMODE_BUFFER) {
726		BPFD_UNLOCK(d);
727		return (EOPNOTSUPP);
728	}
729	if (d->bd_state == BPF_WAITING)
730		callout_stop(&d->bd_callout);
731	timed_out = (d->bd_state == BPF_TIMED_OUT);
732	d->bd_state = BPF_IDLE;
733	/*
734	 * If the hold buffer is empty, then do a timed sleep, which
735	 * ends when the timeout expires or when enough packets
736	 * have arrived to fill the store buffer.
737	 */
738	while (d->bd_hbuf == NULL) {
739		if (d->bd_slen != 0) {
740			/*
741			 * A packet(s) either arrived since the previous
742			 * read or arrived while we were asleep.
743			 */
744			if (d->bd_immediate || non_block || timed_out) {
745				/*
746				 * Rotate the buffers and return what's here
747				 * if we are in immediate mode, non-blocking
748				 * flag is set, or this descriptor timed out.
749				 */
750				ROTATE_BUFFERS(d);
751				break;
752			}
753		}
754
755		/*
756		 * No data is available, check to see if the bpf device
757		 * is still pointed at a real interface.  If not, return
758		 * ENXIO so that the userland process knows to rebind
759		 * it before using it again.
760		 */
761		if (d->bd_bif == NULL) {
762			BPFD_UNLOCK(d);
763			return (ENXIO);
764		}
765
766		if (non_block) {
767			BPFD_UNLOCK(d);
768			return (EWOULDBLOCK);
769		}
770		error = msleep(d, &d->bd_mtx, PRINET|PCATCH,
771		     "bpf", d->bd_rtout);
772		if (error == EINTR || error == ERESTART) {
773			BPFD_UNLOCK(d);
774			return (error);
775		}
776		if (error == EWOULDBLOCK) {
777			/*
778			 * On a timeout, return what's in the buffer,
779			 * which may be nothing.  If there is something
780			 * in the store buffer, we can rotate the buffers.
781			 */
782			if (d->bd_hbuf)
783				/*
784				 * We filled up the buffer in between
785				 * getting the timeout and arriving
786				 * here, so we don't need to rotate.
787				 */
788				break;
789
790			if (d->bd_slen == 0) {
791				BPFD_UNLOCK(d);
792				return (0);
793			}
794			ROTATE_BUFFERS(d);
795			break;
796		}
797	}
798	/*
799	 * At this point, we know we have something in the hold slot.
800	 */
801	BPFD_UNLOCK(d);
802
803	/*
804	 * Move data from hold buffer into user space.
805	 * We know the entire buffer is transferred since
806	 * we checked above that the read buffer is bpf_bufsize bytes.
807	 *
808	 * XXXRW: More synchronization needed here: what if a second thread
809	 * issues a read on the same fd at the same time?  Don't want this
810	 * getting invalidated.
811	 */
812	error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio);
813
814	BPFD_LOCK(d);
815	d->bd_fbuf = d->bd_hbuf;
816	d->bd_hbuf = NULL;
817	d->bd_hlen = 0;
818	bpf_buf_reclaimed(d);
819	BPFD_UNLOCK(d);
820
821	return (error);
822}
823
824/*
825 * If there are processes sleeping on this descriptor, wake them up.
826 */
827static __inline void
828bpf_wakeup(struct bpf_d *d)
829{
830
831	BPFD_LOCK_ASSERT(d);
832	if (d->bd_state == BPF_WAITING) {
833		callout_stop(&d->bd_callout);
834		d->bd_state = BPF_IDLE;
835	}
836	wakeup(d);
837	if (d->bd_async && d->bd_sig && d->bd_sigio)
838		pgsigio(&d->bd_sigio, d->bd_sig, 0);
839
840	selwakeuppri(&d->bd_sel, PRINET);
841	KNOTE_LOCKED(&d->bd_sel.si_note, 0);
842}
843
844static void
845bpf_timed_out(void *arg)
846{
847	struct bpf_d *d = (struct bpf_d *)arg;
848
849	BPFD_LOCK_ASSERT(d);
850
851	if (callout_pending(&d->bd_callout) || !callout_active(&d->bd_callout))
852		return;
853	if (d->bd_state == BPF_WAITING) {
854		d->bd_state = BPF_TIMED_OUT;
855		if (d->bd_slen != 0)
856			bpf_wakeup(d);
857	}
858}
859
860static int
861bpf_ready(struct bpf_d *d)
862{
863
864	BPFD_LOCK_ASSERT(d);
865
866	if (!bpf_canfreebuf(d) && d->bd_hlen != 0)
867		return (1);
868	if ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
869	    d->bd_slen != 0)
870		return (1);
871	return (0);
872}
873
874static int
875bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
876{
877	struct bpf_d *d;
878	struct ifnet *ifp;
879	struct mbuf *m, *mc;
880	struct sockaddr dst;
881	int error, hlen;
882
883	error = devfs_get_cdevpriv((void **)&d);
884	if (error != 0)
885		return (error);
886
887	d->bd_pid = curthread->td_proc->p_pid;
888	d->bd_wcount++;
889	if (d->bd_bif == NULL) {
890		d->bd_wdcount++;
891		return (ENXIO);
892	}
893
894	ifp = d->bd_bif->bif_ifp;
895
896	if ((ifp->if_flags & IFF_UP) == 0) {
897		d->bd_wdcount++;
898		return (ENETDOWN);
899	}
900
901	if (uio->uio_resid == 0) {
902		d->bd_wdcount++;
903		return (0);
904	}
905
906	bzero(&dst, sizeof(dst));
907	m = NULL;
908	hlen = 0;
909	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
910	    &m, &dst, &hlen, d->bd_wfilter);
911	if (error) {
912		d->bd_wdcount++;
913		return (error);
914	}
915	d->bd_wfcount++;
916	if (d->bd_hdrcmplt)
917		dst.sa_family = pseudo_AF_HDRCMPLT;
918
919	if (d->bd_feedback) {
920		mc = m_dup(m, M_DONTWAIT);
921		if (mc != NULL)
922			mc->m_pkthdr.rcvif = ifp;
923		/* Set M_PROMISC for outgoing packets to be discarded. */
924		if (d->bd_direction == BPF_D_INOUT)
925			m->m_flags |= M_PROMISC;
926	} else
927		mc = NULL;
928
929	m->m_pkthdr.len -= hlen;
930	m->m_len -= hlen;
931	m->m_data += hlen;	/* XXX */
932
933	CURVNET_SET(ifp->if_vnet);
934#ifdef MAC
935	BPFD_LOCK(d);
936	mac_bpfdesc_create_mbuf(d, m);
937	if (mc != NULL)
938		mac_bpfdesc_create_mbuf(d, mc);
939	BPFD_UNLOCK(d);
940#endif
941
942	error = (*ifp->if_output)(ifp, m, &dst, NULL);
943	if (error)
944		d->bd_wdcount++;
945
946	if (mc != NULL) {
947		if (error == 0)
948			(*ifp->if_input)(ifp, mc);
949		else
950			m_freem(mc);
951	}
952	CURVNET_RESTORE();
953
954	return (error);
955}
956
957/*
958 * Reset a descriptor by flushing its packet buffer and clearing the receive
959 * and drop counts.  This is doable for kernel-only buffers, but with
960 * zero-copy buffers, we can't write to (or rotate) buffers that are
961 * currently owned by userspace.  It would be nice if we could encapsulate
962 * this logic in the buffer code rather than here.
963 */
964static void
965reset_d(struct bpf_d *d)
966{
967
968	mtx_assert(&d->bd_mtx, MA_OWNED);
969
970	if ((d->bd_hbuf != NULL) &&
971	    (d->bd_bufmode != BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) {
972		/* Free the hold buffer. */
973		d->bd_fbuf = d->bd_hbuf;
974		d->bd_hbuf = NULL;
975		d->bd_hlen = 0;
976		bpf_buf_reclaimed(d);
977	}
978	if (bpf_canwritebuf(d))
979		d->bd_slen = 0;
980	d->bd_rcount = 0;
981	d->bd_dcount = 0;
982	d->bd_fcount = 0;
983	d->bd_wcount = 0;
984	d->bd_wfcount = 0;
985	d->bd_wdcount = 0;
986	d->bd_zcopy = 0;
987}
988
989/*
990 *  FIONREAD		Check for read packet available.
991 *  SIOCGIFADDR		Get interface address - convenient hook to driver.
992 *  BIOCGBLEN		Get buffer len [for read()].
993 *  BIOCSETF		Set read filter.
994 *  BIOCSETFNR		Set read filter without resetting descriptor.
995 *  BIOCSETWF		Set write filter.
996 *  BIOCFLUSH		Flush read packet buffer.
997 *  BIOCPROMISC		Put interface into promiscuous mode.
998 *  BIOCGDLT		Get link layer type.
999 *  BIOCGETIF		Get interface name.
1000 *  BIOCSETIF		Set interface.
1001 *  BIOCSRTIMEOUT	Set read timeout.
1002 *  BIOCGRTIMEOUT	Get read timeout.
1003 *  BIOCGSTATS		Get packet stats.
1004 *  BIOCIMMEDIATE	Set immediate mode.
1005 *  BIOCVERSION		Get filter language version.
1006 *  BIOCGHDRCMPLT	Get "header already complete" flag
1007 *  BIOCSHDRCMPLT	Set "header already complete" flag
1008 *  BIOCGDIRECTION	Get packet direction flag
1009 *  BIOCSDIRECTION	Set packet direction flag
1010 *  BIOCLOCK		Set "locked" flag
1011 *  BIOCFEEDBACK	Set packet feedback mode.
1012 *  BIOCSETZBUF		Set current zero-copy buffer locations.
1013 *  BIOCGETZMAX		Get maximum zero-copy buffer size.
1014 *  BIOCROTZBUF		Force rotation of zero-copy buffer
1015 *  BIOCSETBUFMODE	Set buffer mode.
1016 *  BIOCGETBUFMODE	Get current buffer mode.
1017 */
1018/* ARGSUSED */
1019static	int
1020bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
1021    struct thread *td)
1022{
1023	struct bpf_d *d;
1024	int error;
1025
1026	error = devfs_get_cdevpriv((void **)&d);
1027	if (error != 0)
1028		return (error);
1029
1030	/*
1031	 * Refresh PID associated with this descriptor.
1032	 */
1033	BPFD_LOCK(d);
1034	d->bd_pid = td->td_proc->p_pid;
1035	if (d->bd_state == BPF_WAITING)
1036		callout_stop(&d->bd_callout);
1037	d->bd_state = BPF_IDLE;
1038	BPFD_UNLOCK(d);
1039
1040	if (d->bd_locked == 1) {
1041		switch (cmd) {
1042		case BIOCGBLEN:
1043		case BIOCFLUSH:
1044		case BIOCGDLT:
1045		case BIOCGDLTLIST:
1046#ifdef COMPAT_FREEBSD32
1047		case BIOCGDLTLIST32:
1048#endif
1049		case BIOCGETIF:
1050		case BIOCGRTIMEOUT:
1051#ifdef COMPAT_FREEBSD32
1052		case BIOCGRTIMEOUT32:
1053#endif
1054		case BIOCGSTATS:
1055		case BIOCVERSION:
1056		case BIOCGRSIG:
1057		case BIOCGHDRCMPLT:
1058		case BIOCFEEDBACK:
1059		case FIONREAD:
1060		case BIOCLOCK:
1061		case BIOCSRTIMEOUT:
1062#ifdef COMPAT_FREEBSD32
1063		case BIOCSRTIMEOUT32:
1064#endif
1065		case BIOCIMMEDIATE:
1066		case TIOCGPGRP:
1067		case BIOCROTZBUF:
1068			break;
1069		default:
1070			return (EPERM);
1071		}
1072	}
1073#ifdef COMPAT_FREEBSD32
1074	/*
1075	 * If we see a 32-bit compat ioctl, mark the stream as 32-bit so
1076	 * that it will get 32-bit packet headers.
1077	 */
1078	switch (cmd) {
1079	case BIOCSETF32:
1080	case BIOCSETFNR32:
1081	case BIOCSETWF32:
1082	case BIOCGDLTLIST32:
1083	case BIOCGRTIMEOUT32:
1084	case BIOCSRTIMEOUT32:
1085		d->bd_compat32 = 1;
1086	}
1087#endif
1088
1089	CURVNET_SET(TD_TO_VNET(td));
1090	switch (cmd) {
1091
1092	default:
1093		error = EINVAL;
1094		break;
1095
1096	/*
1097	 * Check for read packet available.
1098	 */
1099	case FIONREAD:
1100		{
1101			int n;
1102
1103			BPFD_LOCK(d);
1104			n = d->bd_slen;
1105			if (d->bd_hbuf)
1106				n += d->bd_hlen;
1107			BPFD_UNLOCK(d);
1108
1109			*(int *)addr = n;
1110			break;
1111		}
1112
1113	case SIOCGIFADDR:
1114		{
1115			struct ifnet *ifp;
1116
1117			if (d->bd_bif == NULL)
1118				error = EINVAL;
1119			else {
1120				ifp = d->bd_bif->bif_ifp;
1121				error = (*ifp->if_ioctl)(ifp, cmd, addr);
1122			}
1123			break;
1124		}
1125
1126	/*
1127	 * Get buffer len [for read()].
1128	 */
1129	case BIOCGBLEN:
1130		*(u_int *)addr = d->bd_bufsize;
1131		break;
1132
1133	/*
1134	 * Set buffer length.
1135	 */
1136	case BIOCSBLEN:
1137		error = bpf_ioctl_sblen(d, (u_int *)addr);
1138		break;
1139
1140	/*
1141	 * Set link layer read filter.
1142	 */
1143	case BIOCSETF:
1144	case BIOCSETFNR:
1145	case BIOCSETWF:
1146#ifdef COMPAT_FREEBSD32
1147	case BIOCSETF32:
1148	case BIOCSETFNR32:
1149	case BIOCSETWF32:
1150#endif
1151		error = bpf_setf(d, (struct bpf_program *)addr, cmd);
1152		break;
1153
1154	/*
1155	 * Flush read packet buffer.
1156	 */
1157	case BIOCFLUSH:
1158		BPFD_LOCK(d);
1159		reset_d(d);
1160		BPFD_UNLOCK(d);
1161		break;
1162
1163	/*
1164	 * Put interface into promiscuous mode.
1165	 */
1166	case BIOCPROMISC:
1167		if (d->bd_bif == NULL) {
1168			/*
1169			 * No interface attached yet.
1170			 */
1171			error = EINVAL;
1172			break;
1173		}
1174		if (d->bd_promisc == 0) {
1175			error = ifpromisc(d->bd_bif->bif_ifp, 1);
1176			if (error == 0)
1177				d->bd_promisc = 1;
1178		}
1179		break;
1180
1181	/*
1182	 * Get current data link type.
1183	 */
1184	case BIOCGDLT:
1185		if (d->bd_bif == NULL)
1186			error = EINVAL;
1187		else
1188			*(u_int *)addr = d->bd_bif->bif_dlt;
1189		break;
1190
1191	/*
1192	 * Get a list of supported data link types.
1193	 */
1194#ifdef COMPAT_FREEBSD32
1195	case BIOCGDLTLIST32:
1196		{
1197			struct bpf_dltlist32 *list32;
1198			struct bpf_dltlist dltlist;
1199
1200			list32 = (struct bpf_dltlist32 *)addr;
1201			dltlist.bfl_len = list32->bfl_len;
1202			dltlist.bfl_list = PTRIN(list32->bfl_list);
1203			if (d->bd_bif == NULL)
1204				error = EINVAL;
1205			else {
1206				error = bpf_getdltlist(d, &dltlist);
1207				if (error == 0)
1208					list32->bfl_len = dltlist.bfl_len;
1209			}
1210			break;
1211		}
1212#endif
1213
1214	case BIOCGDLTLIST:
1215		if (d->bd_bif == NULL)
1216			error = EINVAL;
1217		else
1218			error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
1219		break;
1220
1221	/*
1222	 * Set data link type.
1223	 */
1224	case BIOCSDLT:
1225		if (d->bd_bif == NULL)
1226			error = EINVAL;
1227		else
1228			error = bpf_setdlt(d, *(u_int *)addr);
1229		break;
1230
1231	/*
1232	 * Get interface name.
1233	 */
1234	case BIOCGETIF:
1235		if (d->bd_bif == NULL)
1236			error = EINVAL;
1237		else {
1238			struct ifnet *const ifp = d->bd_bif->bif_ifp;
1239			struct ifreq *const ifr = (struct ifreq *)addr;
1240
1241			strlcpy(ifr->ifr_name, ifp->if_xname,
1242			    sizeof(ifr->ifr_name));
1243		}
1244		break;
1245
1246	/*
1247	 * Set interface.
1248	 */
1249	case BIOCSETIF:
1250		error = bpf_setif(d, (struct ifreq *)addr);
1251		break;
1252
1253	/*
1254	 * Set read timeout.
1255	 */
1256	case BIOCSRTIMEOUT:
1257#ifdef COMPAT_FREEBSD32
1258	case BIOCSRTIMEOUT32:
1259#endif
1260		{
1261			struct timeval *tv = (struct timeval *)addr;
1262#ifdef COMPAT_FREEBSD32
1263			struct timeval32 *tv32;
1264			struct timeval tv64;
1265
1266			if (cmd == BIOCSRTIMEOUT32) {
1267				tv32 = (struct timeval32 *)addr;
1268				tv = &tv64;
1269				tv->tv_sec = tv32->tv_sec;
1270				tv->tv_usec = tv32->tv_usec;
1271			} else
1272#endif
1273				tv = (struct timeval *)addr;
1274
1275			/*
1276			 * Subtract 1 tick from tvtohz() since this isn't
1277			 * a one-shot timer.
1278			 */
1279			if ((error = itimerfix(tv)) == 0)
1280				d->bd_rtout = tvtohz(tv) - 1;
1281			break;
1282		}
1283
1284	/*
1285	 * Get read timeout.
1286	 */
1287	case BIOCGRTIMEOUT:
1288#ifdef COMPAT_FREEBSD32
1289	case BIOCGRTIMEOUT32:
1290#endif
1291		{
1292			struct timeval *tv;
1293#ifdef COMPAT_FREEBSD32
1294			struct timeval32 *tv32;
1295			struct timeval tv64;
1296
1297			if (cmd == BIOCGRTIMEOUT32)
1298				tv = &tv64;
1299			else
1300#endif
1301				tv = (struct timeval *)addr;
1302
1303			tv->tv_sec = d->bd_rtout / hz;
1304			tv->tv_usec = (d->bd_rtout % hz) * tick;
1305#ifdef COMPAT_FREEBSD32
1306			if (cmd == BIOCGRTIMEOUT32) {
1307				tv32 = (struct timeval32 *)addr;
1308				tv32->tv_sec = tv->tv_sec;
1309				tv32->tv_usec = tv->tv_usec;
1310			}
1311#endif
1312
1313			break;
1314		}
1315
1316	/*
1317	 * Get packet stats.
1318	 */
1319	case BIOCGSTATS:
1320		{
1321			struct bpf_stat *bs = (struct bpf_stat *)addr;
1322
1323			/* XXXCSJP overflow */
1324			bs->bs_recv = d->bd_rcount;
1325			bs->bs_drop = d->bd_dcount;
1326			break;
1327		}
1328
1329	/*
1330	 * Set immediate mode.
1331	 */
1332	case BIOCIMMEDIATE:
1333		d->bd_immediate = *(u_int *)addr;
1334		break;
1335
1336	case BIOCVERSION:
1337		{
1338			struct bpf_version *bv = (struct bpf_version *)addr;
1339
1340			bv->bv_major = BPF_MAJOR_VERSION;
1341			bv->bv_minor = BPF_MINOR_VERSION;
1342			break;
1343		}
1344
1345	/*
1346	 * Get "header already complete" flag
1347	 */
1348	case BIOCGHDRCMPLT:
1349		*(u_int *)addr = d->bd_hdrcmplt;
1350		break;
1351
1352	/*
1353	 * Set "header already complete" flag
1354	 */
1355	case BIOCSHDRCMPLT:
1356		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
1357		break;
1358
1359	/*
1360	 * Get packet direction flag
1361	 */
1362	case BIOCGDIRECTION:
1363		*(u_int *)addr = d->bd_direction;
1364		break;
1365
1366	/*
1367	 * Set packet direction flag
1368	 */
1369	case BIOCSDIRECTION:
1370		{
1371			u_int	direction;
1372
1373			direction = *(u_int *)addr;
1374			switch (direction) {
1375			case BPF_D_IN:
1376			case BPF_D_INOUT:
1377			case BPF_D_OUT:
1378				d->bd_direction = direction;
1379				break;
1380			default:
1381				error = EINVAL;
1382			}
1383		}
1384		break;
1385
1386	case BIOCFEEDBACK:
1387		d->bd_feedback = *(u_int *)addr;
1388		break;
1389
1390	case BIOCLOCK:
1391		d->bd_locked = 1;
1392		break;
1393
1394	case FIONBIO:		/* Non-blocking I/O */
1395		break;
1396
1397	case FIOASYNC:		/* Send signal on receive packets */
1398		d->bd_async = *(int *)addr;
1399		break;
1400
1401	case FIOSETOWN:
1402		error = fsetown(*(int *)addr, &d->bd_sigio);
1403		break;
1404
1405	case FIOGETOWN:
1406		*(int *)addr = fgetown(&d->bd_sigio);
1407		break;
1408
1409	/* This is deprecated, FIOSETOWN should be used instead. */
1410	case TIOCSPGRP:
1411		error = fsetown(-(*(int *)addr), &d->bd_sigio);
1412		break;
1413
1414	/* This is deprecated, FIOGETOWN should be used instead. */
1415	case TIOCGPGRP:
1416		*(int *)addr = -fgetown(&d->bd_sigio);
1417		break;
1418
1419	case BIOCSRSIG:		/* Set receive signal */
1420		{
1421			u_int sig;
1422
1423			sig = *(u_int *)addr;
1424
1425			if (sig >= NSIG)
1426				error = EINVAL;
1427			else
1428				d->bd_sig = sig;
1429			break;
1430		}
1431	case BIOCGRSIG:
1432		*(u_int *)addr = d->bd_sig;
1433		break;
1434
1435	case BIOCGETBUFMODE:
1436		*(u_int *)addr = d->bd_bufmode;
1437		break;
1438
1439	case BIOCSETBUFMODE:
1440		/*
1441		 * Allow the buffering mode to be changed as long as we
1442		 * haven't yet committed to a particular mode.  Our
1443		 * definition of commitment, for now, is whether or not a
1444		 * buffer has been allocated or an interface attached, since
1445		 * that's the point where things get tricky.
1446		 */
1447		switch (*(u_int *)addr) {
1448		case BPF_BUFMODE_BUFFER:
1449			break;
1450
1451		case BPF_BUFMODE_ZBUF:
1452			if (bpf_zerocopy_enable)
1453				break;
1454			/* FALLSTHROUGH */
1455
1456		default:
1457			CURVNET_RESTORE();
1458			return (EINVAL);
1459		}
1460
1461		BPFD_LOCK(d);
1462		if (d->bd_sbuf != NULL || d->bd_hbuf != NULL ||
1463		    d->bd_fbuf != NULL || d->bd_bif != NULL) {
1464			BPFD_UNLOCK(d);
1465			CURVNET_RESTORE();
1466			return (EBUSY);
1467		}
1468		d->bd_bufmode = *(u_int *)addr;
1469		BPFD_UNLOCK(d);
1470		break;
1471
1472	case BIOCGETZMAX:
1473		error = bpf_ioctl_getzmax(td, d, (size_t *)addr);
1474		break;
1475
1476	case BIOCSETZBUF:
1477		error = bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr);
1478		break;
1479
1480	case BIOCROTZBUF:
1481		error = bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr);
1482		break;
1483	}
1484	CURVNET_RESTORE();
1485	return (error);
1486}
1487
1488/*
1489 * Set d's packet filter program to fp.  If this file already has a filter,
1490 * free it and replace it.  Returns EINVAL for bogus requests.
1491 */
1492static int
1493bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
1494{
1495	struct bpf_insn *fcode, *old;
1496	u_int wfilter, flen, size;
1497#ifdef BPF_JITTER
1498	bpf_jit_filter *ofunc;
1499#endif
1500#ifdef COMPAT_FREEBSD32
1501	struct bpf_program32 *fp32;
1502	struct bpf_program fp_swab;
1503
1504	if (cmd == BIOCSETWF32 || cmd == BIOCSETF32 || cmd == BIOCSETFNR32) {
1505		fp32 = (struct bpf_program32 *)fp;
1506		fp_swab.bf_len = fp32->bf_len;
1507		fp_swab.bf_insns = (struct bpf_insn *)(uintptr_t)fp32->bf_insns;
1508		fp = &fp_swab;
1509		if (cmd == BIOCSETWF32)
1510			cmd = BIOCSETWF;
1511	}
1512#endif
1513	if (cmd == BIOCSETWF) {
1514		old = d->bd_wfilter;
1515		wfilter = 1;
1516#ifdef BPF_JITTER
1517		ofunc = NULL;
1518#endif
1519	} else {
1520		wfilter = 0;
1521		old = d->bd_rfilter;
1522#ifdef BPF_JITTER
1523		ofunc = d->bd_bfilter;
1524#endif
1525	}
1526	if (fp->bf_insns == NULL) {
1527		if (fp->bf_len != 0)
1528			return (EINVAL);
1529		BPFD_LOCK(d);
1530		if (wfilter)
1531			d->bd_wfilter = NULL;
1532		else {
1533			d->bd_rfilter = NULL;
1534#ifdef BPF_JITTER
1535			d->bd_bfilter = NULL;
1536#endif
1537			if (cmd == BIOCSETF)
1538				reset_d(d);
1539		}
1540		BPFD_UNLOCK(d);
1541		if (old != NULL)
1542			free((caddr_t)old, M_BPF);
1543#ifdef BPF_JITTER
1544		if (ofunc != NULL)
1545			bpf_destroy_jit_filter(ofunc);
1546#endif
1547		return (0);
1548	}
1549	flen = fp->bf_len;
1550	if (flen > bpf_maxinsns)
1551		return (EINVAL);
1552
1553	size = flen * sizeof(*fp->bf_insns);
1554	fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
1555	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
1556	    bpf_validate(fcode, (int)flen)) {
1557		BPFD_LOCK(d);
1558		if (wfilter)
1559			d->bd_wfilter = fcode;
1560		else {
1561			d->bd_rfilter = fcode;
1562#ifdef BPF_JITTER
1563			d->bd_bfilter = bpf_jitter(fcode, flen);
1564#endif
1565			if (cmd == BIOCSETF)
1566				reset_d(d);
1567		}
1568		BPFD_UNLOCK(d);
1569		if (old != NULL)
1570			free((caddr_t)old, M_BPF);
1571#ifdef BPF_JITTER
1572		if (ofunc != NULL)
1573			bpf_destroy_jit_filter(ofunc);
1574#endif
1575
1576		return (0);
1577	}
1578	free((caddr_t)fcode, M_BPF);
1579	return (EINVAL);
1580}
1581
1582/*
1583 * Detach a file from its current interface (if attached at all) and attach
1584 * to the interface indicated by the name stored in ifr.
1585 * Return an errno or 0.
1586 */
1587static int
1588bpf_setif(struct bpf_d *d, struct ifreq *ifr)
1589{
1590	struct bpf_if *bp;
1591	struct ifnet *theywant;
1592
1593	theywant = ifunit(ifr->ifr_name);
1594	if (theywant == NULL || theywant->if_bpf == NULL)
1595		return (ENXIO);
1596
1597	bp = theywant->if_bpf;
1598
1599	/*
1600	 * Behavior here depends on the buffering model.  If we're using
1601	 * kernel memory buffers, then we can allocate them here.  If we're
1602	 * using zero-copy, then the user process must have registered
1603	 * buffers by the time we get here.  If not, return an error.
1604	 *
1605	 * XXXRW: There are locking issues here with multi-threaded use: what
1606	 * if two threads try to set the interface at once?
1607	 */
1608	switch (d->bd_bufmode) {
1609	case BPF_BUFMODE_BUFFER:
1610		if (d->bd_sbuf == NULL)
1611			bpf_buffer_alloc(d);
1612		KASSERT(d->bd_sbuf != NULL, ("bpf_setif: bd_sbuf NULL"));
1613		break;
1614
1615	case BPF_BUFMODE_ZBUF:
1616		if (d->bd_sbuf == NULL)
1617			return (EINVAL);
1618		break;
1619
1620	default:
1621		panic("bpf_setif: bufmode %d", d->bd_bufmode);
1622	}
1623	if (bp != d->bd_bif) {
1624		if (d->bd_bif)
1625			/*
1626			 * Detach if attached to something else.
1627			 */
1628			bpf_detachd(d);
1629
1630		bpf_attachd(d, bp);
1631	}
1632	BPFD_LOCK(d);
1633	reset_d(d);
1634	BPFD_UNLOCK(d);
1635	return (0);
1636}
1637
1638/*
1639 * Support for select() and poll() system calls
1640 *
1641 * Return true iff the specific operation will not block indefinitely.
1642 * Otherwise, return false but make a note that a selwakeup() must be done.
1643 */
1644static int
1645bpfpoll(struct cdev *dev, int events, struct thread *td)
1646{
1647	struct bpf_d *d;
1648	int revents;
1649
1650	if (devfs_get_cdevpriv((void **)&d) != 0 || d->bd_bif == NULL)
1651		return (events &
1652		    (POLLHUP|POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM));
1653
1654	/*
1655	 * Refresh PID associated with this descriptor.
1656	 */
1657	revents = events & (POLLOUT | POLLWRNORM);
1658	BPFD_LOCK(d);
1659	d->bd_pid = td->td_proc->p_pid;
1660	if (events & (POLLIN | POLLRDNORM)) {
1661		if (bpf_ready(d))
1662			revents |= events & (POLLIN | POLLRDNORM);
1663		else {
1664			selrecord(td, &d->bd_sel);
1665			/* Start the read timeout if necessary. */
1666			if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1667				callout_reset(&d->bd_callout, d->bd_rtout,
1668				    bpf_timed_out, d);
1669				d->bd_state = BPF_WAITING;
1670			}
1671		}
1672	}
1673	BPFD_UNLOCK(d);
1674	return (revents);
1675}
1676
1677/*
1678 * Support for kevent() system call.  Register EVFILT_READ filters and
1679 * reject all others.
1680 */
1681int
1682bpfkqfilter(struct cdev *dev, struct knote *kn)
1683{
1684	struct bpf_d *d;
1685
1686	if (devfs_get_cdevpriv((void **)&d) != 0 ||
1687	    kn->kn_filter != EVFILT_READ)
1688		return (1);
1689
1690	/*
1691	 * Refresh PID associated with this descriptor.
1692	 */
1693	BPFD_LOCK(d);
1694	d->bd_pid = curthread->td_proc->p_pid;
1695	kn->kn_fop = &bpfread_filtops;
1696	kn->kn_hook = d;
1697	knlist_add(&d->bd_sel.si_note, kn, 1);
1698	BPFD_UNLOCK(d);
1699
1700	return (0);
1701}
1702
1703static void
1704filt_bpfdetach(struct knote *kn)
1705{
1706	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1707
1708	knlist_remove(&d->bd_sel.si_note, kn, 0);
1709}
1710
1711static int
1712filt_bpfread(struct knote *kn, long hint)
1713{
1714	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1715	int ready;
1716
1717	BPFD_LOCK_ASSERT(d);
1718	ready = bpf_ready(d);
1719	if (ready) {
1720		kn->kn_data = d->bd_slen;
1721		if (d->bd_hbuf)
1722			kn->kn_data += d->bd_hlen;
1723	} else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1724		callout_reset(&d->bd_callout, d->bd_rtout,
1725		    bpf_timed_out, d);
1726		d->bd_state = BPF_WAITING;
1727	}
1728
1729	return (ready);
1730}
1731
1732/*
1733 * Incoming linkage from device drivers.  Process the packet pkt, of length
1734 * pktlen, which is stored in a contiguous buffer.  The packet is parsed
1735 * by each process' filter, and if accepted, stashed into the corresponding
1736 * buffer.
1737 */
1738void
1739bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
1740{
1741	struct bpf_d *d;
1742#ifdef BPF_JITTER
1743	bpf_jit_filter *bf;
1744#endif
1745	u_int slen;
1746	int gottime;
1747	struct timeval tv;
1748
1749	gottime = 0;
1750	BPFIF_LOCK(bp);
1751	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1752		BPFD_LOCK(d);
1753		++d->bd_rcount;
1754		/*
1755		 * NB: We dont call BPF_CHECK_DIRECTION() here since there is no
1756		 * way for the caller to indiciate to us whether this packet
1757		 * is inbound or outbound.  In the bpf_mtap() routines, we use
1758		 * the interface pointers on the mbuf to figure it out.
1759		 */
1760#ifdef BPF_JITTER
1761		bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
1762		if (bf != NULL)
1763			slen = (*(bf->func))(pkt, pktlen, pktlen);
1764		else
1765#endif
1766		slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
1767		if (slen != 0) {
1768			d->bd_fcount++;
1769			if (!gottime) {
1770				microtime(&tv);
1771				gottime = 1;
1772			}
1773#ifdef MAC
1774			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
1775#endif
1776				catchpacket(d, pkt, pktlen, slen,
1777				    bpf_append_bytes, &tv);
1778		}
1779		BPFD_UNLOCK(d);
1780	}
1781	BPFIF_UNLOCK(bp);
1782}
1783
1784#define	BPF_CHECK_DIRECTION(d, r, i)				\
1785	    (((d)->bd_direction == BPF_D_IN && (r) != (i)) ||	\
1786	    ((d)->bd_direction == BPF_D_OUT && (r) == (i)))
1787
1788/*
1789 * Incoming linkage from device drivers, when packet is in an mbuf chain.
1790 */
1791void
1792bpf_mtap(struct bpf_if *bp, struct mbuf *m)
1793{
1794	struct bpf_d *d;
1795#ifdef BPF_JITTER
1796	bpf_jit_filter *bf;
1797#endif
1798	u_int pktlen, slen;
1799	int gottime;
1800	struct timeval tv;
1801
1802	/* Skip outgoing duplicate packets. */
1803	if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
1804		m->m_flags &= ~M_PROMISC;
1805		return;
1806	}
1807
1808	gottime = 0;
1809
1810	pktlen = m_length(m, NULL);
1811
1812	BPFIF_LOCK(bp);
1813	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1814		if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
1815			continue;
1816		BPFD_LOCK(d);
1817		++d->bd_rcount;
1818#ifdef BPF_JITTER
1819		bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
1820		/* XXX We cannot handle multiple mbufs. */
1821		if (bf != NULL && m->m_next == NULL)
1822			slen = (*(bf->func))(mtod(m, u_char *), pktlen, pktlen);
1823		else
1824#endif
1825		slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
1826		if (slen != 0) {
1827			d->bd_fcount++;
1828			if (!gottime) {
1829				microtime(&tv);
1830				gottime = 1;
1831			}
1832#ifdef MAC
1833			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
1834#endif
1835				catchpacket(d, (u_char *)m, pktlen, slen,
1836				    bpf_append_mbuf, &tv);
1837		}
1838		BPFD_UNLOCK(d);
1839	}
1840	BPFIF_UNLOCK(bp);
1841}
1842
1843/*
1844 * Incoming linkage from device drivers, when packet is in
1845 * an mbuf chain and to be prepended by a contiguous header.
1846 */
1847void
1848bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
1849{
1850	struct mbuf mb;
1851	struct bpf_d *d;
1852	u_int pktlen, slen;
1853	int gottime;
1854	struct timeval tv;
1855
1856	/* Skip outgoing duplicate packets. */
1857	if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
1858		m->m_flags &= ~M_PROMISC;
1859		return;
1860	}
1861
1862	gottime = 0;
1863
1864	pktlen = m_length(m, NULL);
1865	/*
1866	 * Craft on-stack mbuf suitable for passing to bpf_filter.
1867	 * Note that we cut corners here; we only setup what's
1868	 * absolutely needed--this mbuf should never go anywhere else.
1869	 */
1870	mb.m_next = m;
1871	mb.m_data = data;
1872	mb.m_len = dlen;
1873	pktlen += dlen;
1874
1875	BPFIF_LOCK(bp);
1876	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1877		if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
1878			continue;
1879		BPFD_LOCK(d);
1880		++d->bd_rcount;
1881		slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
1882		if (slen != 0) {
1883			d->bd_fcount++;
1884			if (!gottime) {
1885				microtime(&tv);
1886				gottime = 1;
1887			}
1888#ifdef MAC
1889			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
1890#endif
1891				catchpacket(d, (u_char *)&mb, pktlen, slen,
1892				    bpf_append_mbuf, &tv);
1893		}
1894		BPFD_UNLOCK(d);
1895	}
1896	BPFIF_UNLOCK(bp);
1897}
1898
1899#undef	BPF_CHECK_DIRECTION
1900
1901/*
1902 * Move the packet data from interface memory (pkt) into the
1903 * store buffer.  "cpfn" is the routine called to do the actual data
1904 * transfer.  bcopy is passed in to copy contiguous chunks, while
1905 * bpf_append_mbuf is passed in to copy mbuf chains.  In the latter case,
1906 * pkt is really an mbuf.
1907 */
1908static void
1909catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
1910    void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int),
1911    struct timeval *tv)
1912{
1913	struct bpf_hdr hdr;
1914#ifdef COMPAT_FREEBSD32
1915	struct bpf_hdr32 hdr32;
1916#endif
1917	int totlen, curlen;
1918	int hdrlen = d->bd_bif->bif_hdrlen;
1919	int do_wakeup = 0;
1920
1921	BPFD_LOCK_ASSERT(d);
1922
1923	/*
1924	 * Detect whether user space has released a buffer back to us, and if
1925	 * so, move it from being a hold buffer to a free buffer.  This may
1926	 * not be the best place to do it (for example, we might only want to
1927	 * run this check if we need the space), but for now it's a reliable
1928	 * spot to do it.
1929	 */
1930	if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) {
1931		d->bd_fbuf = d->bd_hbuf;
1932		d->bd_hbuf = NULL;
1933		d->bd_hlen = 0;
1934		bpf_buf_reclaimed(d);
1935	}
1936
1937	/*
1938	 * Figure out how many bytes to move.  If the packet is
1939	 * greater or equal to the snapshot length, transfer that
1940	 * much.  Otherwise, transfer the whole packet (unless
1941	 * we hit the buffer size limit).
1942	 */
1943	totlen = hdrlen + min(snaplen, pktlen);
1944	if (totlen > d->bd_bufsize)
1945		totlen = d->bd_bufsize;
1946
1947	/*
1948	 * Round up the end of the previous packet to the next longword.
1949	 *
1950	 * Drop the packet if there's no room and no hope of room
1951	 * If the packet would overflow the storage buffer or the storage
1952	 * buffer is considered immutable by the buffer model, try to rotate
1953	 * the buffer and wakeup pending processes.
1954	 */
1955#ifdef COMPAT_FREEBSD32
1956	if (d->bd_compat32)
1957		curlen = BPF_WORDALIGN32(d->bd_slen);
1958	else
1959#endif
1960		curlen = BPF_WORDALIGN(d->bd_slen);
1961	if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) {
1962		if (d->bd_fbuf == NULL) {
1963			/*
1964			 * There's no room in the store buffer, and no
1965			 * prospect of room, so drop the packet.  Notify the
1966			 * buffer model.
1967			 */
1968			bpf_buffull(d);
1969			++d->bd_dcount;
1970			return;
1971		}
1972		ROTATE_BUFFERS(d);
1973		do_wakeup = 1;
1974		curlen = 0;
1975	} else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
1976		/*
1977		 * Immediate mode is set, or the read timeout has already
1978		 * expired during a select call.  A packet arrived, so the
1979		 * reader should be woken up.
1980		 */
1981		do_wakeup = 1;
1982#ifdef COMPAT_FREEBSD32
1983	/*
1984	 * If this is a 32-bit stream, then stick a 32-bit header at the
1985	 * front and copy the data into the buffer.
1986	 */
1987	if (d->bd_compat32) {
1988		bzero(&hdr32, sizeof(hdr32));
1989		hdr32.bh_tstamp.tv_sec = tv->tv_sec;
1990		hdr32.bh_tstamp.tv_usec = tv->tv_usec;
1991		hdr32.bh_datalen = pktlen;
1992		hdr32.bh_hdrlen = hdrlen;
1993		hdr.bh_caplen = hdr32.bh_caplen = totlen - hdrlen;
1994		bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr32, sizeof(hdr32));
1995		goto copy;
1996	}
1997#endif
1998
1999	/*
2000	 * Append the bpf header.  Note we append the actual header size, but
2001	 * move forward the length of the header plus padding.
2002	 */
2003	bzero(&hdr, sizeof(hdr));
2004	hdr.bh_tstamp = *tv;
2005	hdr.bh_datalen = pktlen;
2006	hdr.bh_hdrlen = hdrlen;
2007	hdr.bh_caplen = totlen - hdrlen;
2008	bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr));
2009
2010	/*
2011	 * Copy the packet data into the store buffer and update its length.
2012	 */
2013#ifdef COMPAT_FREEBSD32
2014 copy:
2015#endif
2016	(*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, hdr.bh_caplen);
2017	d->bd_slen = curlen + totlen;
2018
2019	if (do_wakeup)
2020		bpf_wakeup(d);
2021}
2022
2023/*
2024 * Free buffers currently in use by a descriptor.
2025 * Called on close.
2026 */
2027static void
2028bpf_freed(struct bpf_d *d)
2029{
2030
2031	/*
2032	 * We don't need to lock out interrupts since this descriptor has
2033	 * been detached from its interface and it yet hasn't been marked
2034	 * free.
2035	 */
2036	bpf_free(d);
2037	if (d->bd_rfilter != NULL) {
2038		free((caddr_t)d->bd_rfilter, M_BPF);
2039#ifdef BPF_JITTER
2040		if (d->bd_bfilter != NULL)
2041			bpf_destroy_jit_filter(d->bd_bfilter);
2042#endif
2043	}
2044	if (d->bd_wfilter != NULL)
2045		free((caddr_t)d->bd_wfilter, M_BPF);
2046	mtx_destroy(&d->bd_mtx);
2047}
2048
2049/*
2050 * Attach an interface to bpf.  dlt is the link layer type; hdrlen is the
2051 * fixed size of the link header (variable length headers not yet supported).
2052 */
2053void
2054bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
2055{
2056
2057	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
2058}
2059
2060/*
2061 * Attach an interface to bpf.  ifp is a pointer to the structure
2062 * defining the interface to be attached, dlt is the link layer type,
2063 * and hdrlen is the fixed size of the link header (variable length
2064 * headers are not yet supporrted).
2065 */
2066void
2067bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
2068{
2069	struct bpf_if *bp;
2070
2071	bp = malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
2072	if (bp == NULL)
2073		panic("bpfattach");
2074
2075	LIST_INIT(&bp->bif_dlist);
2076	bp->bif_ifp = ifp;
2077	bp->bif_dlt = dlt;
2078	mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF);
2079	KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized"));
2080	*driverp = bp;
2081
2082	mtx_lock(&bpf_mtx);
2083	LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
2084	mtx_unlock(&bpf_mtx);
2085
2086	/*
2087	 * Compute the length of the bpf header.  This is not necessarily
2088	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
2089	 * that the network layer header begins on a longword boundary (for
2090	 * performance reasons and to alleviate alignment restrictions).
2091	 */
2092	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
2093
2094	if (bootverbose)
2095		if_printf(ifp, "bpf attached\n");
2096}
2097
2098/*
2099 * Detach bpf from an interface.  This involves detaching each descriptor
2100 * associated with the interface, and leaving bd_bif NULL.  Notify each
2101 * descriptor as it's detached so that any sleepers wake up and get
2102 * ENXIO.
2103 */
2104void
2105bpfdetach(struct ifnet *ifp)
2106{
2107	struct bpf_if	*bp;
2108	struct bpf_d	*d;
2109
2110	/* Locate BPF interface information */
2111	mtx_lock(&bpf_mtx);
2112	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2113		if (ifp == bp->bif_ifp)
2114			break;
2115	}
2116
2117	/* Interface wasn't attached */
2118	if ((bp == NULL) || (bp->bif_ifp == NULL)) {
2119		mtx_unlock(&bpf_mtx);
2120		printf("bpfdetach: %s was not attached\n", ifp->if_xname);
2121		return;
2122	}
2123
2124	LIST_REMOVE(bp, bif_next);
2125	mtx_unlock(&bpf_mtx);
2126
2127	while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
2128		bpf_detachd(d);
2129		BPFD_LOCK(d);
2130		bpf_wakeup(d);
2131		BPFD_UNLOCK(d);
2132	}
2133
2134	mtx_destroy(&bp->bif_mtx);
2135	free(bp, M_BPF);
2136}
2137
2138/*
2139 * Get a list of available data link type of the interface.
2140 */
2141static int
2142bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
2143{
2144	int n, error;
2145	struct ifnet *ifp;
2146	struct bpf_if *bp;
2147
2148	ifp = d->bd_bif->bif_ifp;
2149	n = 0;
2150	error = 0;
2151	mtx_lock(&bpf_mtx);
2152	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2153		if (bp->bif_ifp != ifp)
2154			continue;
2155		if (bfl->bfl_list != NULL) {
2156			if (n >= bfl->bfl_len) {
2157				mtx_unlock(&bpf_mtx);
2158				return (ENOMEM);
2159			}
2160			error = copyout(&bp->bif_dlt,
2161			    bfl->bfl_list + n, sizeof(u_int));
2162		}
2163		n++;
2164	}
2165	mtx_unlock(&bpf_mtx);
2166	bfl->bfl_len = n;
2167	return (error);
2168}
2169
2170/*
2171 * Set the data link type of a BPF instance.
2172 */
2173static int
2174bpf_setdlt(struct bpf_d *d, u_int dlt)
2175{
2176	int error, opromisc;
2177	struct ifnet *ifp;
2178	struct bpf_if *bp;
2179
2180	if (d->bd_bif->bif_dlt == dlt)
2181		return (0);
2182	ifp = d->bd_bif->bif_ifp;
2183	mtx_lock(&bpf_mtx);
2184	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2185		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
2186			break;
2187	}
2188	mtx_unlock(&bpf_mtx);
2189	if (bp != NULL) {
2190		opromisc = d->bd_promisc;
2191		bpf_detachd(d);
2192		bpf_attachd(d, bp);
2193		BPFD_LOCK(d);
2194		reset_d(d);
2195		BPFD_UNLOCK(d);
2196		if (opromisc) {
2197			error = ifpromisc(bp->bif_ifp, 1);
2198			if (error)
2199				if_printf(bp->bif_ifp,
2200					"bpf_setdlt: ifpromisc failed (%d)\n",
2201					error);
2202			else
2203				d->bd_promisc = 1;
2204		}
2205	}
2206	return (bp == NULL ? EINVAL : 0);
2207}
2208
2209static void
2210bpf_drvinit(void *unused)
2211{
2212	struct cdev *dev;
2213
2214	mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
2215	LIST_INIT(&bpf_iflist);
2216
2217	dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf");
2218	/* For compatibility */
2219	make_dev_alias(dev, "bpf0");
2220}
2221
2222/*
2223 * Zero out the various packet counters associated with all of the bpf
2224 * descriptors.  At some point, we will probably want to get a bit more
2225 * granular and allow the user to specify descriptors to be zeroed.
2226 */
2227static void
2228bpf_zero_counters(void)
2229{
2230	struct bpf_if *bp;
2231	struct bpf_d *bd;
2232
2233	mtx_lock(&bpf_mtx);
2234	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2235		BPFIF_LOCK(bp);
2236		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
2237			BPFD_LOCK(bd);
2238			bd->bd_rcount = 0;
2239			bd->bd_dcount = 0;
2240			bd->bd_fcount = 0;
2241			bd->bd_wcount = 0;
2242			bd->bd_wfcount = 0;
2243			bd->bd_zcopy = 0;
2244			BPFD_UNLOCK(bd);
2245		}
2246		BPFIF_UNLOCK(bp);
2247	}
2248	mtx_unlock(&bpf_mtx);
2249}
2250
2251static void
2252bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
2253{
2254
2255	bzero(d, sizeof(*d));
2256	BPFD_LOCK_ASSERT(bd);
2257	d->bd_structsize = sizeof(*d);
2258	d->bd_immediate = bd->bd_immediate;
2259	d->bd_promisc = bd->bd_promisc;
2260	d->bd_hdrcmplt = bd->bd_hdrcmplt;
2261	d->bd_direction = bd->bd_direction;
2262	d->bd_feedback = bd->bd_feedback;
2263	d->bd_async = bd->bd_async;
2264	d->bd_rcount = bd->bd_rcount;
2265	d->bd_dcount = bd->bd_dcount;
2266	d->bd_fcount = bd->bd_fcount;
2267	d->bd_sig = bd->bd_sig;
2268	d->bd_slen = bd->bd_slen;
2269	d->bd_hlen = bd->bd_hlen;
2270	d->bd_bufsize = bd->bd_bufsize;
2271	d->bd_pid = bd->bd_pid;
2272	strlcpy(d->bd_ifname,
2273	    bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
2274	d->bd_locked = bd->bd_locked;
2275	d->bd_wcount = bd->bd_wcount;
2276	d->bd_wdcount = bd->bd_wdcount;
2277	d->bd_wfcount = bd->bd_wfcount;
2278	d->bd_zcopy = bd->bd_zcopy;
2279	d->bd_bufmode = bd->bd_bufmode;
2280}
2281
2282static int
2283bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
2284{
2285	struct xbpf_d *xbdbuf, *xbd, zerostats;
2286	int index, error;
2287	struct bpf_if *bp;
2288	struct bpf_d *bd;
2289
2290	/*
2291	 * XXX This is not technically correct. It is possible for non
2292	 * privileged users to open bpf devices. It would make sense
2293	 * if the users who opened the devices were able to retrieve
2294	 * the statistics for them, too.
2295	 */
2296	error = priv_check(req->td, PRIV_NET_BPF);
2297	if (error)
2298		return (error);
2299	/*
2300	 * Check to see if the user is requesting that the counters be
2301	 * zeroed out.  Explicitly check that the supplied data is zeroed,
2302	 * as we aren't allowing the user to set the counters currently.
2303	 */
2304	if (req->newptr != NULL) {
2305		if (req->newlen != sizeof(zerostats))
2306			return (EINVAL);
2307		bzero(&zerostats, sizeof(zerostats));
2308		xbd = req->newptr;
2309		if (bcmp(xbd, &zerostats, sizeof(*xbd)) != 0)
2310			return (EINVAL);
2311		bpf_zero_counters();
2312		return (0);
2313	}
2314	if (req->oldptr == NULL)
2315		return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd)));
2316	if (bpf_bpfd_cnt == 0)
2317		return (SYSCTL_OUT(req, 0, 0));
2318	xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK);
2319	mtx_lock(&bpf_mtx);
2320	if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) {
2321		mtx_unlock(&bpf_mtx);
2322		free(xbdbuf, M_BPF);
2323		return (ENOMEM);
2324	}
2325	index = 0;
2326	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2327		BPFIF_LOCK(bp);
2328		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
2329			xbd = &xbdbuf[index++];
2330			BPFD_LOCK(bd);
2331			bpfstats_fill_xbpf(xbd, bd);
2332			BPFD_UNLOCK(bd);
2333		}
2334		BPFIF_UNLOCK(bp);
2335	}
2336	mtx_unlock(&bpf_mtx);
2337	error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
2338	free(xbdbuf, M_BPF);
2339	return (error);
2340}
2341
2342SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL);
2343
2344#else /* !DEV_BPF && !NETGRAPH_BPF */
2345/*
2346 * NOP stubs to allow bpf-using drivers to load and function.
2347 *
2348 * A 'better' implementation would allow the core bpf functionality
2349 * to be loaded at runtime.
2350 */
2351static struct bpf_if bp_null;
2352
2353void
2354bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
2355{
2356}
2357
2358void
2359bpf_mtap(struct bpf_if *bp, struct mbuf *m)
2360{
2361}
2362
2363void
2364bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m)
2365{
2366}
2367
2368void
2369bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
2370{
2371
2372	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
2373}
2374
2375void
2376bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
2377{
2378
2379	*driverp = &bp_null;
2380}
2381
2382void
2383bpfdetach(struct ifnet *ifp)
2384{
2385}
2386
2387u_int
2388bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
2389{
2390	return -1;	/* "no filter" behaviour */
2391}
2392
2393int
2394bpf_validate(const struct bpf_insn *f, int len)
2395{
2396	return 0;		/* false */
2397}
2398
2399#endif /* !DEV_BPF && !NETGRAPH_BPF */
2400