bpf.c revision 172930
1/*-
2 * Copyright (c) 1990, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from the Stanford/CMU enet packet filter,
6 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
7 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
8 * Berkeley Laboratory.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *      @(#)bpf.c	8.4 (Berkeley) 1/9/95
35 *
36 * $FreeBSD: head/sys/net/bpf.c 172930 2007-10-24 19:04:04Z rwatson $
37 */
38
39#include "opt_bpf.h"
40#include "opt_mac.h"
41#include "opt_netgraph.h"
42
43#include <sys/types.h>
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/conf.h>
47#include <sys/fcntl.h>
48#include <sys/malloc.h>
49#include <sys/mbuf.h>
50#include <sys/time.h>
51#include <sys/priv.h>
52#include <sys/proc.h>
53#include <sys/signalvar.h>
54#include <sys/filio.h>
55#include <sys/sockio.h>
56#include <sys/ttycom.h>
57#include <sys/uio.h>
58
59#include <sys/event.h>
60#include <sys/file.h>
61#include <sys/poll.h>
62#include <sys/proc.h>
63
64#include <sys/socket.h>
65
66#include <net/if.h>
67#include <net/bpf.h>
68#ifdef BPF_JITTER
69#include <net/bpf_jitter.h>
70#endif
71#include <net/bpfdesc.h>
72
73#include <netinet/in.h>
74#include <netinet/if_ether.h>
75#include <sys/kernel.h>
76#include <sys/sysctl.h>
77
78#include <net80211/ieee80211_freebsd.h>
79
80#include <security/mac/mac_framework.h>
81
82static MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
83
84#if defined(DEV_BPF) || defined(NETGRAPH_BPF)
85
86#define PRINET  26			/* interruptible */
87
88#define	M_SKIP_BPF	M_SKIP_FIREWALL
89
90/*
91 * bpf_iflist is a list of BPF interface structures, each corresponding to a
92 * specific DLT.  The same network interface might have several BPF interface
93 * structures registered by different layers in the stack (i.e., 802.11
94 * frames, ethernet frames, etc).
95 */
96static LIST_HEAD(, bpf_if)	bpf_iflist;
97static struct mtx	bpf_mtx;		/* bpf global lock */
98static int		bpf_bpfd_cnt;
99
100static void	bpf_allocbufs(struct bpf_d *);
101static void	bpf_attachd(struct bpf_d *, struct bpf_if *);
102static void	bpf_detachd(struct bpf_d *);
103static void	bpf_freed(struct bpf_d *);
104static void	bpf_mcopy(const void *, void *, size_t);
105static int	bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
106		    struct sockaddr *, int *, struct bpf_insn *);
107static int	bpf_setif(struct bpf_d *, struct ifreq *);
108static void	bpf_timed_out(void *);
109static __inline void
110		bpf_wakeup(struct bpf_d *);
111static void	catchpacket(struct bpf_d *, u_char *, u_int,
112		    u_int, void (*)(const void *, void *, size_t),
113		    struct timeval *);
114static void	reset_d(struct bpf_d *);
115static int	 bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
116static int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
117static int	bpf_setdlt(struct bpf_d *, u_int);
118static void	filt_bpfdetach(struct knote *);
119static int	filt_bpfread(struct knote *, long);
120static void	bpf_drvinit(void *);
121static void	bpf_clone(void *, struct ucred *, char *, int, struct cdev **);
122static int	bpf_stats_sysctl(SYSCTL_HANDLER_ARGS);
123
124SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl");
125static int bpf_bufsize = 4096;
126SYSCTL_INT(_net_bpf, OID_AUTO, bufsize, CTLFLAG_RW,
127    &bpf_bufsize, 0, "Default bpf buffer size");
128static int bpf_maxbufsize = BPF_MAXBUFSIZE;
129SYSCTL_INT(_net_bpf, OID_AUTO, maxbufsize, CTLFLAG_RW,
130    &bpf_maxbufsize, 0, "Maximum bpf buffer size");
131static int bpf_maxinsns = BPF_MAXINSNS;
132SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
133    &bpf_maxinsns, 0, "Maximum bpf program instructions");
134SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_RW,
135    bpf_stats_sysctl, "bpf statistics portal");
136
137static	d_open_t	bpfopen;
138static	d_close_t	bpfclose;
139static	d_read_t	bpfread;
140static	d_write_t	bpfwrite;
141static	d_ioctl_t	bpfioctl;
142static	d_poll_t	bpfpoll;
143static	d_kqfilter_t	bpfkqfilter;
144
145static struct cdevsw bpf_cdevsw = {
146	.d_version =	D_VERSION,
147	.d_open =	bpfopen,
148	.d_close =	bpfclose,
149	.d_read =	bpfread,
150	.d_write =	bpfwrite,
151	.d_ioctl =	bpfioctl,
152	.d_poll =	bpfpoll,
153	.d_name =	"bpf",
154	.d_kqfilter =	bpfkqfilter,
155};
156
157static struct filterops bpfread_filtops =
158	{ 1, NULL, filt_bpfdetach, filt_bpfread };
159
160static int
161bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
162    struct sockaddr *sockp, int *hdrlen, struct bpf_insn *wfilter)
163{
164	const struct ieee80211_bpf_params *p;
165	struct ether_header *eh;
166	struct mbuf *m;
167	int error;
168	int len;
169	int hlen;
170	int slen;
171
172	/*
173	 * Build a sockaddr based on the data link layer type.
174	 * We do this at this level because the ethernet header
175	 * is copied directly into the data field of the sockaddr.
176	 * In the case of SLIP, there is no header and the packet
177	 * is forwarded as is.
178	 * Also, we are careful to leave room at the front of the mbuf
179	 * for the link level header.
180	 */
181	switch (linktype) {
182
183	case DLT_SLIP:
184		sockp->sa_family = AF_INET;
185		hlen = 0;
186		break;
187
188	case DLT_EN10MB:
189		sockp->sa_family = AF_UNSPEC;
190		/* XXX Would MAXLINKHDR be better? */
191		hlen = ETHER_HDR_LEN;
192		break;
193
194	case DLT_FDDI:
195		sockp->sa_family = AF_IMPLINK;
196		hlen = 0;
197		break;
198
199	case DLT_RAW:
200		sockp->sa_family = AF_UNSPEC;
201		hlen = 0;
202		break;
203
204	case DLT_NULL:
205		/*
206		 * null interface types require a 4 byte pseudo header which
207		 * corresponds to the address family of the packet.
208		 */
209		sockp->sa_family = AF_UNSPEC;
210		hlen = 4;
211		break;
212
213	case DLT_ATM_RFC1483:
214		/*
215		 * en atm driver requires 4-byte atm pseudo header.
216		 * though it isn't standard, vpi:vci needs to be
217		 * specified anyway.
218		 */
219		sockp->sa_family = AF_UNSPEC;
220		hlen = 12;	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
221		break;
222
223	case DLT_PPP:
224		sockp->sa_family = AF_UNSPEC;
225		hlen = 4;	/* This should match PPP_HDRLEN */
226		break;
227
228	case DLT_IEEE802_11:		/* IEEE 802.11 wireless */
229		sockp->sa_family = AF_IEEE80211;
230		hlen = 0;
231		break;
232
233	case DLT_IEEE802_11_RADIO:	/* IEEE 802.11 wireless w/ phy params */
234		sockp->sa_family = AF_IEEE80211;
235		sockp->sa_len = 12;	/* XXX != 0 */
236		hlen = sizeof(struct ieee80211_bpf_params);
237		break;
238
239	default:
240		return (EIO);
241	}
242
243	len = uio->uio_resid;
244
245	if (len - hlen > ifp->if_mtu)
246		return (EMSGSIZE);
247
248	if ((unsigned)len > MCLBYTES)
249		return (EIO);
250
251	if (len > MHLEN) {
252		m = m_getcl(M_TRYWAIT, MT_DATA, M_PKTHDR);
253	} else {
254		MGETHDR(m, M_TRYWAIT, MT_DATA);
255	}
256	if (m == NULL)
257		return (ENOBUFS);
258	m->m_pkthdr.len = m->m_len = len;
259	m->m_pkthdr.rcvif = NULL;
260	*mp = m;
261
262	if (m->m_len < hlen) {
263		error = EPERM;
264		goto bad;
265	}
266
267	error = uiomove(mtod(m, u_char *), len, uio);
268	if (error)
269		goto bad;
270
271	slen = bpf_filter(wfilter, mtod(m, u_char *), len, len);
272	if (slen == 0) {
273		error = EPERM;
274		goto bad;
275	}
276
277	/* Check for multicast destination */
278	switch (linktype) {
279	case DLT_EN10MB:
280		eh = mtod(m, struct ether_header *);
281		if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
282			if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost,
283			    ETHER_ADDR_LEN) == 0)
284				m->m_flags |= M_BCAST;
285			else
286				m->m_flags |= M_MCAST;
287		}
288		break;
289	}
290
291	/*
292	 * Make room for link header, and copy it to sockaddr
293	 */
294	if (hlen != 0) {
295		if (sockp->sa_family == AF_IEEE80211) {
296			/*
297			 * Collect true length from the parameter header
298			 * NB: sockp is known to be zero'd so if we do a
299			 *     short copy unspecified parameters will be
300			 *     zero.
301			 * NB: packet may not be aligned after stripping
302			 *     bpf params
303			 * XXX check ibp_vers
304			 */
305			p = mtod(m, const struct ieee80211_bpf_params *);
306			hlen = p->ibp_len;
307			if (hlen > sizeof(sockp->sa_data)) {
308				error = EINVAL;
309				goto bad;
310			}
311		}
312		bcopy(m->m_data, sockp->sa_data, hlen);
313	}
314	*hdrlen = hlen;
315
316	return (0);
317bad:
318	m_freem(m);
319	return (error);
320}
321
322/*
323 * Attach file to the bpf interface, i.e. make d listen on bp.
324 */
325static void
326bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
327{
328	/*
329	 * Point d at bp, and add d to the interface's list of listeners.
330	 * Finally, point the driver's bpf cookie at the interface so
331	 * it will divert packets to bpf.
332	 */
333	BPFIF_LOCK(bp);
334	d->bd_bif = bp;
335	LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
336
337	bpf_bpfd_cnt++;
338	BPFIF_UNLOCK(bp);
339}
340
341/*
342 * Detach a file from its interface.
343 */
344static void
345bpf_detachd(struct bpf_d *d)
346{
347	int error;
348	struct bpf_if *bp;
349	struct ifnet *ifp;
350
351	bp = d->bd_bif;
352	BPFIF_LOCK(bp);
353	BPFD_LOCK(d);
354	ifp = d->bd_bif->bif_ifp;
355
356	/*
357	 * Remove d from the interface's descriptor list.
358	 */
359	LIST_REMOVE(d, bd_next);
360
361	bpf_bpfd_cnt--;
362	d->bd_bif = NULL;
363	BPFD_UNLOCK(d);
364	BPFIF_UNLOCK(bp);
365
366	/*
367	 * Check if this descriptor had requested promiscuous mode.
368	 * If so, turn it off.
369	 */
370	if (d->bd_promisc) {
371		d->bd_promisc = 0;
372		error = ifpromisc(ifp, 0);
373		if (error != 0 && error != ENXIO) {
374			/*
375			 * ENXIO can happen if a pccard is unplugged
376			 * Something is really wrong if we were able to put
377			 * the driver into promiscuous mode, but can't
378			 * take it out.
379			 */
380			if_printf(bp->bif_ifp,
381				"bpf_detach: ifpromisc failed (%d)\n", error);
382		}
383	}
384}
385
386/*
387 * Open ethernet device.  Returns ENXIO for illegal minor device number,
388 * EBUSY if file is open by another process.
389 */
390/* ARGSUSED */
391static	int
392bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
393{
394	struct bpf_d *d;
395
396	mtx_lock(&bpf_mtx);
397	d = dev->si_drv1;
398	/*
399	 * Each minor can be opened by only one process.  If the requested
400	 * minor is in use, return EBUSY.
401	 */
402	if (d != NULL) {
403		mtx_unlock(&bpf_mtx);
404		return (EBUSY);
405	}
406	dev->si_drv1 = (struct bpf_d *)~0;	/* mark device in use */
407	mtx_unlock(&bpf_mtx);
408
409	if ((dev->si_flags & SI_NAMED) == 0)
410		make_dev(&bpf_cdevsw, minor(dev), UID_ROOT, GID_WHEEL, 0600,
411		    "bpf%d", dev2unit(dev));
412	MALLOC(d, struct bpf_d *, sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
413	dev->si_drv1 = d;
414	d->bd_bufsize = bpf_bufsize;
415	d->bd_sig = SIGIO;
416	d->bd_direction = BPF_D_INOUT;
417	d->bd_pid = td->td_proc->p_pid;
418#ifdef MAC
419	mac_bpfdesc_init(d);
420	mac_bpfdesc_create(td->td_ucred, d);
421#endif
422	mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
423	callout_init(&d->bd_callout, CALLOUT_MPSAFE);
424	knlist_init(&d->bd_sel.si_note, &d->bd_mtx, NULL, NULL, NULL);
425
426	return (0);
427}
428
429/*
430 * Close the descriptor by detaching it from its interface,
431 * deallocating its buffers, and marking it free.
432 */
433/* ARGSUSED */
434static	int
435bpfclose(struct cdev *dev, int flags, int fmt, struct thread *td)
436{
437	struct bpf_d *d = dev->si_drv1;
438
439	BPFD_LOCK(d);
440	if (d->bd_state == BPF_WAITING)
441		callout_stop(&d->bd_callout);
442	d->bd_state = BPF_IDLE;
443	BPFD_UNLOCK(d);
444	funsetown(&d->bd_sigio);
445	mtx_lock(&bpf_mtx);
446	if (d->bd_bif)
447		bpf_detachd(d);
448	mtx_unlock(&bpf_mtx);
449	selwakeuppri(&d->bd_sel, PRINET);
450#ifdef MAC
451	mac_bpfdesc_destroy(d);
452#endif /* MAC */
453	knlist_destroy(&d->bd_sel.si_note);
454	bpf_freed(d);
455	dev->si_drv1 = NULL;
456	free(d, M_BPF);
457
458	return (0);
459}
460
461
462/*
463 * Rotate the packet buffers in descriptor d.  Move the store buffer
464 * into the hold slot, and the free buffer into the store slot.
465 * Zero the length of the new store buffer.
466 */
467#define ROTATE_BUFFERS(d) \
468	(d)->bd_hbuf = (d)->bd_sbuf; \
469	(d)->bd_hlen = (d)->bd_slen; \
470	(d)->bd_sbuf = (d)->bd_fbuf; \
471	(d)->bd_slen = 0; \
472	(d)->bd_fbuf = NULL;
473/*
474 *  bpfread - read next chunk of packets from buffers
475 */
476static	int
477bpfread(struct cdev *dev, struct uio *uio, int ioflag)
478{
479	struct bpf_d *d = dev->si_drv1;
480	int timed_out;
481	int error;
482
483	/*
484	 * Restrict application to use a buffer the same size as
485	 * as kernel buffers.
486	 */
487	if (uio->uio_resid != d->bd_bufsize)
488		return (EINVAL);
489
490	BPFD_LOCK(d);
491	d->bd_pid = curthread->td_proc->p_pid;
492	if (d->bd_state == BPF_WAITING)
493		callout_stop(&d->bd_callout);
494	timed_out = (d->bd_state == BPF_TIMED_OUT);
495	d->bd_state = BPF_IDLE;
496	/*
497	 * If the hold buffer is empty, then do a timed sleep, which
498	 * ends when the timeout expires or when enough packets
499	 * have arrived to fill the store buffer.
500	 */
501	while (d->bd_hbuf == NULL) {
502		if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
503			/*
504			 * A packet(s) either arrived since the previous
505			 * read or arrived while we were asleep.
506			 * Rotate the buffers and return what's here.
507			 */
508			ROTATE_BUFFERS(d);
509			break;
510		}
511
512		/*
513		 * No data is available, check to see if the bpf device
514		 * is still pointed at a real interface.  If not, return
515		 * ENXIO so that the userland process knows to rebind
516		 * it before using it again.
517		 */
518		if (d->bd_bif == NULL) {
519			BPFD_UNLOCK(d);
520			return (ENXIO);
521		}
522
523		if (ioflag & O_NONBLOCK) {
524			BPFD_UNLOCK(d);
525			return (EWOULDBLOCK);
526		}
527		error = msleep(d, &d->bd_mtx, PRINET|PCATCH,
528		     "bpf", d->bd_rtout);
529		if (error == EINTR || error == ERESTART) {
530			BPFD_UNLOCK(d);
531			return (error);
532		}
533		if (error == EWOULDBLOCK) {
534			/*
535			 * On a timeout, return what's in the buffer,
536			 * which may be nothing.  If there is something
537			 * in the store buffer, we can rotate the buffers.
538			 */
539			if (d->bd_hbuf)
540				/*
541				 * We filled up the buffer in between
542				 * getting the timeout and arriving
543				 * here, so we don't need to rotate.
544				 */
545				break;
546
547			if (d->bd_slen == 0) {
548				BPFD_UNLOCK(d);
549				return (0);
550			}
551			ROTATE_BUFFERS(d);
552			break;
553		}
554	}
555	/*
556	 * At this point, we know we have something in the hold slot.
557	 */
558	BPFD_UNLOCK(d);
559
560	/*
561	 * Move data from hold buffer into user space.
562	 * We know the entire buffer is transferred since
563	 * we checked above that the read buffer is bpf_bufsize bytes.
564	 */
565	error = uiomove(d->bd_hbuf, d->bd_hlen, uio);
566
567	BPFD_LOCK(d);
568	d->bd_fbuf = d->bd_hbuf;
569	d->bd_hbuf = NULL;
570	d->bd_hlen = 0;
571	BPFD_UNLOCK(d);
572
573	return (error);
574}
575
576
577/*
578 * If there are processes sleeping on this descriptor, wake them up.
579 */
580static __inline void
581bpf_wakeup(struct bpf_d *d)
582{
583
584	BPFD_LOCK_ASSERT(d);
585	if (d->bd_state == BPF_WAITING) {
586		callout_stop(&d->bd_callout);
587		d->bd_state = BPF_IDLE;
588	}
589	wakeup(d);
590	if (d->bd_async && d->bd_sig && d->bd_sigio)
591		pgsigio(&d->bd_sigio, d->bd_sig, 0);
592
593	selwakeuppri(&d->bd_sel, PRINET);
594	KNOTE_LOCKED(&d->bd_sel.si_note, 0);
595}
596
597static void
598bpf_timed_out(void *arg)
599{
600	struct bpf_d *d = (struct bpf_d *)arg;
601
602	BPFD_LOCK(d);
603	if (d->bd_state == BPF_WAITING) {
604		d->bd_state = BPF_TIMED_OUT;
605		if (d->bd_slen != 0)
606			bpf_wakeup(d);
607	}
608	BPFD_UNLOCK(d);
609}
610
611static int
612bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
613{
614	struct bpf_d *d = dev->si_drv1;
615	struct ifnet *ifp;
616	struct mbuf *m, *mc;
617	struct sockaddr dst;
618	int error, hlen;
619
620	d->bd_pid = curthread->td_proc->p_pid;
621	if (d->bd_bif == NULL)
622		return (ENXIO);
623
624	ifp = d->bd_bif->bif_ifp;
625
626	if ((ifp->if_flags & IFF_UP) == 0)
627		return (ENETDOWN);
628
629	if (uio->uio_resid == 0)
630		return (0);
631
632	bzero(&dst, sizeof(dst));
633	m = NULL;
634	hlen = 0;
635	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
636	    &m, &dst, &hlen, d->bd_wfilter);
637	if (error)
638		return (error);
639
640	if (d->bd_hdrcmplt)
641		dst.sa_family = pseudo_AF_HDRCMPLT;
642
643	if (d->bd_feedback) {
644		mc = m_dup(m, M_DONTWAIT);
645		if (mc != NULL)
646			mc->m_pkthdr.rcvif = ifp;
647		/* XXX Do not return the same packet twice. */
648		if (d->bd_direction == BPF_D_INOUT)
649			m->m_flags |= M_SKIP_BPF;
650	} else
651		mc = NULL;
652
653	m->m_pkthdr.len -= hlen;
654	m->m_len -= hlen;
655	m->m_data += hlen;	/* XXX */
656
657#ifdef MAC
658	BPFD_LOCK(d);
659	mac_bpfdesc_create_mbuf(d, m);
660	if (mc != NULL)
661		mac_bpfdesc_create_mbuf(d, mc);
662	BPFD_UNLOCK(d);
663#endif
664
665	error = (*ifp->if_output)(ifp, m, &dst, NULL);
666
667	if (mc != NULL) {
668		if (error == 0)
669			(*ifp->if_input)(ifp, mc);
670		else
671			m_freem(mc);
672	}
673
674	return (error);
675}
676
677/*
678 * Reset a descriptor by flushing its packet buffer and clearing the
679 * receive and drop counts.
680 */
681static void
682reset_d(struct bpf_d *d)
683{
684
685	mtx_assert(&d->bd_mtx, MA_OWNED);
686	if (d->bd_hbuf) {
687		/* Free the hold buffer. */
688		d->bd_fbuf = d->bd_hbuf;
689		d->bd_hbuf = NULL;
690	}
691	d->bd_slen = 0;
692	d->bd_hlen = 0;
693	d->bd_rcount = 0;
694	d->bd_dcount = 0;
695	d->bd_fcount = 0;
696}
697
698/*
699 *  FIONREAD		Check for read packet available.
700 *  SIOCGIFADDR		Get interface address - convenient hook to driver.
701 *  BIOCGBLEN		Get buffer len [for read()].
702 *  BIOCSETF		Set ethernet read filter.
703 *  BIOCSETWF		Set ethernet write filter.
704 *  BIOCFLUSH		Flush read packet buffer.
705 *  BIOCPROMISC		Put interface into promiscuous mode.
706 *  BIOCGDLT		Get link layer type.
707 *  BIOCGETIF		Get interface name.
708 *  BIOCSETIF		Set interface.
709 *  BIOCSRTIMEOUT	Set read timeout.
710 *  BIOCGRTIMEOUT	Get read timeout.
711 *  BIOCGSTATS		Get packet stats.
712 *  BIOCIMMEDIATE	Set immediate mode.
713 *  BIOCVERSION		Get filter language version.
714 *  BIOCGHDRCMPLT	Get "header already complete" flag
715 *  BIOCSHDRCMPLT	Set "header already complete" flag
716 *  BIOCGDIRECTION	Get packet direction flag
717 *  BIOCSDIRECTION	Set packet direction flag
718 *  BIOCLOCK		Set "locked" flag
719 *  BIOCFEEDBACK	Set packet feedback mode.
720 */
721/* ARGSUSED */
722static	int
723bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
724    struct thread *td)
725{
726	struct bpf_d *d = dev->si_drv1;
727	int error = 0;
728
729	/*
730	 * Refresh PID associated with this descriptor.
731	 */
732	BPFD_LOCK(d);
733	d->bd_pid = td->td_proc->p_pid;
734	if (d->bd_state == BPF_WAITING)
735		callout_stop(&d->bd_callout);
736	d->bd_state = BPF_IDLE;
737	BPFD_UNLOCK(d);
738
739	if (d->bd_locked == 1) {
740		switch (cmd) {
741		case BIOCGBLEN:
742		case BIOCFLUSH:
743		case BIOCGDLT:
744		case BIOCGDLTLIST:
745		case BIOCGETIF:
746		case BIOCGRTIMEOUT:
747		case BIOCGSTATS:
748		case BIOCVERSION:
749		case BIOCGRSIG:
750		case BIOCGHDRCMPLT:
751		case BIOCFEEDBACK:
752		case FIONREAD:
753		case BIOCLOCK:
754		case BIOCSRTIMEOUT:
755		case BIOCIMMEDIATE:
756		case TIOCGPGRP:
757			break;
758		default:
759			return (EPERM);
760		}
761	}
762	switch (cmd) {
763
764	default:
765		error = EINVAL;
766		break;
767
768	/*
769	 * Check for read packet available.
770	 */
771	case FIONREAD:
772		{
773			int n;
774
775			BPFD_LOCK(d);
776			n = d->bd_slen;
777			if (d->bd_hbuf)
778				n += d->bd_hlen;
779			BPFD_UNLOCK(d);
780
781			*(int *)addr = n;
782			break;
783		}
784
785	case SIOCGIFADDR:
786		{
787			struct ifnet *ifp;
788
789			if (d->bd_bif == NULL)
790				error = EINVAL;
791			else {
792				ifp = d->bd_bif->bif_ifp;
793				error = (*ifp->if_ioctl)(ifp, cmd, addr);
794			}
795			break;
796		}
797
798	/*
799	 * Get buffer len [for read()].
800	 */
801	case BIOCGBLEN:
802		*(u_int *)addr = d->bd_bufsize;
803		break;
804
805	/*
806	 * Set buffer length.
807	 */
808	case BIOCSBLEN:
809		if (d->bd_bif != NULL)
810			error = EINVAL;
811		else {
812			u_int size = *(u_int *)addr;
813
814			if (size > bpf_maxbufsize)
815				*(u_int *)addr = size = bpf_maxbufsize;
816			else if (size < BPF_MINBUFSIZE)
817				*(u_int *)addr = size = BPF_MINBUFSIZE;
818			d->bd_bufsize = size;
819		}
820		break;
821
822	/*
823	 * Set link layer read filter.
824	 */
825	case BIOCSETF:
826	case BIOCSETWF:
827		error = bpf_setf(d, (struct bpf_program *)addr, cmd);
828		break;
829
830	/*
831	 * Flush read packet buffer.
832	 */
833	case BIOCFLUSH:
834		BPFD_LOCK(d);
835		reset_d(d);
836		BPFD_UNLOCK(d);
837		break;
838
839	/*
840	 * Put interface into promiscuous mode.
841	 */
842	case BIOCPROMISC:
843		if (d->bd_bif == NULL) {
844			/*
845			 * No interface attached yet.
846			 */
847			error = EINVAL;
848			break;
849		}
850		if (d->bd_promisc == 0) {
851			error = ifpromisc(d->bd_bif->bif_ifp, 1);
852			if (error == 0)
853				d->bd_promisc = 1;
854		}
855		break;
856
857	/*
858	 * Get current data link type.
859	 */
860	case BIOCGDLT:
861		if (d->bd_bif == NULL)
862			error = EINVAL;
863		else
864			*(u_int *)addr = d->bd_bif->bif_dlt;
865		break;
866
867	/*
868	 * Get a list of supported data link types.
869	 */
870	case BIOCGDLTLIST:
871		if (d->bd_bif == NULL)
872			error = EINVAL;
873		else
874			error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
875		break;
876
877	/*
878	 * Set data link type.
879	 */
880	case BIOCSDLT:
881		if (d->bd_bif == NULL)
882			error = EINVAL;
883		else
884			error = bpf_setdlt(d, *(u_int *)addr);
885		break;
886
887	/*
888	 * Get interface name.
889	 */
890	case BIOCGETIF:
891		if (d->bd_bif == NULL)
892			error = EINVAL;
893		else {
894			struct ifnet *const ifp = d->bd_bif->bif_ifp;
895			struct ifreq *const ifr = (struct ifreq *)addr;
896
897			strlcpy(ifr->ifr_name, ifp->if_xname,
898			    sizeof(ifr->ifr_name));
899		}
900		break;
901
902	/*
903	 * Set interface.
904	 */
905	case BIOCSETIF:
906		error = bpf_setif(d, (struct ifreq *)addr);
907		break;
908
909	/*
910	 * Set read timeout.
911	 */
912	case BIOCSRTIMEOUT:
913		{
914			struct timeval *tv = (struct timeval *)addr;
915
916			/*
917			 * Subtract 1 tick from tvtohz() since this isn't
918			 * a one-shot timer.
919			 */
920			if ((error = itimerfix(tv)) == 0)
921				d->bd_rtout = tvtohz(tv) - 1;
922			break;
923		}
924
925	/*
926	 * Get read timeout.
927	 */
928	case BIOCGRTIMEOUT:
929		{
930			struct timeval *tv = (struct timeval *)addr;
931
932			tv->tv_sec = d->bd_rtout / hz;
933			tv->tv_usec = (d->bd_rtout % hz) * tick;
934			break;
935		}
936
937	/*
938	 * Get packet stats.
939	 */
940	case BIOCGSTATS:
941		{
942			struct bpf_stat *bs = (struct bpf_stat *)addr;
943
944			bs->bs_recv = d->bd_rcount;
945			bs->bs_drop = d->bd_dcount;
946			break;
947		}
948
949	/*
950	 * Set immediate mode.
951	 */
952	case BIOCIMMEDIATE:
953		d->bd_immediate = *(u_int *)addr;
954		break;
955
956	case BIOCVERSION:
957		{
958			struct bpf_version *bv = (struct bpf_version *)addr;
959
960			bv->bv_major = BPF_MAJOR_VERSION;
961			bv->bv_minor = BPF_MINOR_VERSION;
962			break;
963		}
964
965	/*
966	 * Get "header already complete" flag
967	 */
968	case BIOCGHDRCMPLT:
969		*(u_int *)addr = d->bd_hdrcmplt;
970		break;
971
972	/*
973	 * Set "header already complete" flag
974	 */
975	case BIOCSHDRCMPLT:
976		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
977		break;
978
979	/*
980	 * Get packet direction flag
981	 */
982	case BIOCGDIRECTION:
983		*(u_int *)addr = d->bd_direction;
984		break;
985
986	/*
987	 * Set packet direction flag
988	 */
989	case BIOCSDIRECTION:
990		{
991			u_int	direction;
992
993			direction = *(u_int *)addr;
994			switch (direction) {
995			case BPF_D_IN:
996			case BPF_D_INOUT:
997			case BPF_D_OUT:
998				d->bd_direction = direction;
999				break;
1000			default:
1001				error = EINVAL;
1002			}
1003		}
1004		break;
1005
1006	case BIOCFEEDBACK:
1007		d->bd_feedback = *(u_int *)addr;
1008		break;
1009
1010	case BIOCLOCK:
1011		d->bd_locked = 1;
1012		break;
1013
1014	case FIONBIO:		/* Non-blocking I/O */
1015		break;
1016
1017	case FIOASYNC:		/* Send signal on receive packets */
1018		d->bd_async = *(int *)addr;
1019		break;
1020
1021	case FIOSETOWN:
1022		error = fsetown(*(int *)addr, &d->bd_sigio);
1023		break;
1024
1025	case FIOGETOWN:
1026		*(int *)addr = fgetown(&d->bd_sigio);
1027		break;
1028
1029	/* This is deprecated, FIOSETOWN should be used instead. */
1030	case TIOCSPGRP:
1031		error = fsetown(-(*(int *)addr), &d->bd_sigio);
1032		break;
1033
1034	/* This is deprecated, FIOGETOWN should be used instead. */
1035	case TIOCGPGRP:
1036		*(int *)addr = -fgetown(&d->bd_sigio);
1037		break;
1038
1039	case BIOCSRSIG:		/* Set receive signal */
1040		{
1041			u_int sig;
1042
1043			sig = *(u_int *)addr;
1044
1045			if (sig >= NSIG)
1046				error = EINVAL;
1047			else
1048				d->bd_sig = sig;
1049			break;
1050		}
1051	case BIOCGRSIG:
1052		*(u_int *)addr = d->bd_sig;
1053		break;
1054	}
1055	return (error);
1056}
1057
1058/*
1059 * Set d's packet filter program to fp.  If this file already has a filter,
1060 * free it and replace it.  Returns EINVAL for bogus requests.
1061 */
1062static int
1063bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
1064{
1065	struct bpf_insn *fcode, *old;
1066	u_int wfilter, flen, size;
1067#ifdef BPF_JITTER
1068	bpf_jit_filter *ofunc;
1069#endif
1070
1071	if (cmd == BIOCSETWF) {
1072		old = d->bd_wfilter;
1073		wfilter = 1;
1074#ifdef BPF_JITTER
1075		ofunc = NULL;
1076#endif
1077	} else {
1078		wfilter = 0;
1079		old = d->bd_rfilter;
1080#ifdef BPF_JITTER
1081		ofunc = d->bd_bfilter;
1082#endif
1083	}
1084	if (fp->bf_insns == NULL) {
1085		if (fp->bf_len != 0)
1086			return (EINVAL);
1087		BPFD_LOCK(d);
1088		if (wfilter)
1089			d->bd_wfilter = NULL;
1090		else {
1091			d->bd_rfilter = NULL;
1092#ifdef BPF_JITTER
1093			d->bd_bfilter = NULL;
1094#endif
1095		}
1096		reset_d(d);
1097		BPFD_UNLOCK(d);
1098		if (old != NULL)
1099			free((caddr_t)old, M_BPF);
1100#ifdef BPF_JITTER
1101		if (ofunc != NULL)
1102			bpf_destroy_jit_filter(ofunc);
1103#endif
1104		return (0);
1105	}
1106	flen = fp->bf_len;
1107	if (flen > bpf_maxinsns)
1108		return (EINVAL);
1109
1110	size = flen * sizeof(*fp->bf_insns);
1111	fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
1112	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
1113	    bpf_validate(fcode, (int)flen)) {
1114		BPFD_LOCK(d);
1115		if (wfilter)
1116			d->bd_wfilter = fcode;
1117		else {
1118			d->bd_rfilter = fcode;
1119#ifdef BPF_JITTER
1120			d->bd_bfilter = bpf_jitter(fcode, flen);
1121#endif
1122		}
1123		reset_d(d);
1124		BPFD_UNLOCK(d);
1125		if (old != NULL)
1126			free((caddr_t)old, M_BPF);
1127#ifdef BPF_JITTER
1128		if (ofunc != NULL)
1129			bpf_destroy_jit_filter(ofunc);
1130#endif
1131
1132		return (0);
1133	}
1134	free((caddr_t)fcode, M_BPF);
1135	return (EINVAL);
1136}
1137
1138/*
1139 * Detach a file from its current interface (if attached at all) and attach
1140 * to the interface indicated by the name stored in ifr.
1141 * Return an errno or 0.
1142 */
1143static int
1144bpf_setif(struct bpf_d *d, struct ifreq *ifr)
1145{
1146	struct bpf_if *bp;
1147	struct ifnet *theywant;
1148
1149	theywant = ifunit(ifr->ifr_name);
1150	if (theywant == NULL || theywant->if_bpf == NULL)
1151		return (ENXIO);
1152
1153	bp = theywant->if_bpf;
1154	/*
1155	 * Allocate the packet buffers if we need to.
1156	 * If we're already attached to requested interface,
1157	 * just flush the buffer.
1158	 */
1159	if (d->bd_sbuf == NULL)
1160		bpf_allocbufs(d);
1161	if (bp != d->bd_bif) {
1162		if (d->bd_bif)
1163			/*
1164			 * Detach if attached to something else.
1165			 */
1166			bpf_detachd(d);
1167
1168		bpf_attachd(d, bp);
1169	}
1170	BPFD_LOCK(d);
1171	reset_d(d);
1172	BPFD_UNLOCK(d);
1173	return (0);
1174}
1175
1176/*
1177 * Support for select() and poll() system calls
1178 *
1179 * Return true iff the specific operation will not block indefinitely.
1180 * Otherwise, return false but make a note that a selwakeup() must be done.
1181 */
1182static int
1183bpfpoll(struct cdev *dev, int events, struct thread *td)
1184{
1185	struct bpf_d *d;
1186	int revents;
1187
1188	d = dev->si_drv1;
1189	if (d->bd_bif == NULL)
1190		return (ENXIO);
1191
1192	/*
1193	 * Refresh PID associated with this descriptor.
1194	 */
1195	revents = events & (POLLOUT | POLLWRNORM);
1196	BPFD_LOCK(d);
1197	d->bd_pid = td->td_proc->p_pid;
1198	if (events & (POLLIN | POLLRDNORM)) {
1199		if (bpf_ready(d))
1200			revents |= events & (POLLIN | POLLRDNORM);
1201		else {
1202			selrecord(td, &d->bd_sel);
1203			/* Start the read timeout if necessary. */
1204			if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1205				callout_reset(&d->bd_callout, d->bd_rtout,
1206				    bpf_timed_out, d);
1207				d->bd_state = BPF_WAITING;
1208			}
1209		}
1210	}
1211	BPFD_UNLOCK(d);
1212	return (revents);
1213}
1214
1215/*
1216 * Support for kevent() system call.  Register EVFILT_READ filters and
1217 * reject all others.
1218 */
1219int
1220bpfkqfilter(struct cdev *dev, struct knote *kn)
1221{
1222	struct bpf_d *d = (struct bpf_d *)dev->si_drv1;
1223
1224	if (kn->kn_filter != EVFILT_READ)
1225		return (1);
1226
1227	/*
1228	 * Refresh PID associated with this descriptor.
1229	 */
1230	BPFD_LOCK(d);
1231	d->bd_pid = curthread->td_proc->p_pid;
1232	kn->kn_fop = &bpfread_filtops;
1233	kn->kn_hook = d;
1234	knlist_add(&d->bd_sel.si_note, kn, 1);
1235	BPFD_UNLOCK(d);
1236
1237	return (0);
1238}
1239
1240static void
1241filt_bpfdetach(struct knote *kn)
1242{
1243	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1244
1245	knlist_remove(&d->bd_sel.si_note, kn, 0);
1246}
1247
1248static int
1249filt_bpfread(struct knote *kn, long hint)
1250{
1251	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1252	int ready;
1253
1254	BPFD_LOCK_ASSERT(d);
1255	ready = bpf_ready(d);
1256	if (ready) {
1257		kn->kn_data = d->bd_slen;
1258		if (d->bd_hbuf)
1259			kn->kn_data += d->bd_hlen;
1260	}
1261	else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1262		callout_reset(&d->bd_callout, d->bd_rtout,
1263		    bpf_timed_out, d);
1264		d->bd_state = BPF_WAITING;
1265	}
1266
1267	return (ready);
1268}
1269
1270/*
1271 * Incoming linkage from device drivers.  Process the packet pkt, of length
1272 * pktlen, which is stored in a contiguous buffer.  The packet is parsed
1273 * by each process' filter, and if accepted, stashed into the corresponding
1274 * buffer.
1275 */
1276void
1277bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
1278{
1279	struct bpf_d *d;
1280	u_int slen;
1281	int gottime;
1282	struct timeval tv;
1283
1284	gottime = 0;
1285	BPFIF_LOCK(bp);
1286	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1287		BPFD_LOCK(d);
1288		++d->bd_rcount;
1289#ifdef BPF_JITTER
1290		if (bpf_jitter_enable != 0 && d->bd_bfilter != NULL)
1291			slen = (*(d->bd_bfilter->func))(pkt, pktlen, pktlen);
1292		else
1293#endif
1294		slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
1295		if (slen != 0) {
1296			d->bd_fcount++;
1297			if (!gottime) {
1298				microtime(&tv);
1299				gottime = 1;
1300			}
1301#ifdef MAC
1302			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
1303#endif
1304				catchpacket(d, pkt, pktlen, slen, bcopy, &tv);
1305		}
1306		BPFD_UNLOCK(d);
1307	}
1308	BPFIF_UNLOCK(bp);
1309}
1310
1311/*
1312 * Copy data from an mbuf chain into a buffer.  This code is derived
1313 * from m_copydata in sys/uipc_mbuf.c.
1314 */
1315static void
1316bpf_mcopy(const void *src_arg, void *dst_arg, size_t len)
1317{
1318	const struct mbuf *m;
1319	u_int count;
1320	u_char *dst;
1321
1322	m = src_arg;
1323	dst = dst_arg;
1324	while (len > 0) {
1325		if (m == NULL)
1326			panic("bpf_mcopy");
1327		count = min(m->m_len, len);
1328		bcopy(mtod(m, void *), dst, count);
1329		m = m->m_next;
1330		dst += count;
1331		len -= count;
1332	}
1333}
1334
1335#define	BPF_CHECK_DIRECTION(d, m) \
1336	if (((d)->bd_direction == BPF_D_IN && (m)->m_pkthdr.rcvif == NULL) || \
1337	    ((d)->bd_direction == BPF_D_OUT && (m)->m_pkthdr.rcvif != NULL))
1338
1339/*
1340 * Incoming linkage from device drivers, when packet is in an mbuf chain.
1341 */
1342void
1343bpf_mtap(struct bpf_if *bp, struct mbuf *m)
1344{
1345	struct bpf_d *d;
1346	u_int pktlen, slen;
1347	int gottime;
1348	struct timeval tv;
1349
1350	if (m->m_flags & M_SKIP_BPF) {
1351		m->m_flags &= ~M_SKIP_BPF;
1352		return;
1353	}
1354
1355	gottime = 0;
1356
1357	pktlen = m_length(m, NULL);
1358
1359	BPFIF_LOCK(bp);
1360	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1361		BPF_CHECK_DIRECTION(d, m)
1362			continue;
1363		BPFD_LOCK(d);
1364		++d->bd_rcount;
1365#ifdef BPF_JITTER
1366		/* XXX We cannot handle multiple mbufs. */
1367		if (bpf_jitter_enable != 0 && d->bd_bfilter != NULL &&
1368		    m->m_next == NULL)
1369			slen = (*(d->bd_bfilter->func))(mtod(m, u_char *),
1370			    pktlen, pktlen);
1371		else
1372#endif
1373		slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
1374		if (slen != 0) {
1375			d->bd_fcount++;
1376			if (!gottime) {
1377				microtime(&tv);
1378				gottime = 1;
1379			}
1380#ifdef MAC
1381			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
1382#endif
1383				catchpacket(d, (u_char *)m, pktlen, slen,
1384				    bpf_mcopy, &tv);
1385		}
1386		BPFD_UNLOCK(d);
1387	}
1388	BPFIF_UNLOCK(bp);
1389}
1390
1391/*
1392 * Incoming linkage from device drivers, when packet is in
1393 * an mbuf chain and to be prepended by a contiguous header.
1394 */
1395void
1396bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
1397{
1398	struct mbuf mb;
1399	struct bpf_d *d;
1400	u_int pktlen, slen;
1401	int gottime;
1402	struct timeval tv;
1403
1404	if (m->m_flags & M_SKIP_BPF) {
1405		m->m_flags &= ~M_SKIP_BPF;
1406		return;
1407	}
1408
1409	gottime = 0;
1410
1411	pktlen = m_length(m, NULL);
1412	/*
1413	 * Craft on-stack mbuf suitable for passing to bpf_filter.
1414	 * Note that we cut corners here; we only setup what's
1415	 * absolutely needed--this mbuf should never go anywhere else.
1416	 */
1417	mb.m_next = m;
1418	mb.m_data = data;
1419	mb.m_len = dlen;
1420	pktlen += dlen;
1421
1422	BPFIF_LOCK(bp);
1423	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1424		BPF_CHECK_DIRECTION(d, m)
1425			continue;
1426		BPFD_LOCK(d);
1427		++d->bd_rcount;
1428		slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
1429		if (slen != 0) {
1430			d->bd_fcount++;
1431			if (!gottime) {
1432				microtime(&tv);
1433				gottime = 1;
1434			}
1435#ifdef MAC
1436			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
1437#endif
1438				catchpacket(d, (u_char *)&mb, pktlen, slen,
1439				    bpf_mcopy, &tv);
1440		}
1441		BPFD_UNLOCK(d);
1442	}
1443	BPFIF_UNLOCK(bp);
1444}
1445
1446#undef	BPF_CHECK_DIRECTION
1447
1448/*
1449 * Move the packet data from interface memory (pkt) into the
1450 * store buffer.  "cpfn" is the routine called to do the actual data
1451 * transfer.  bcopy is passed in to copy contiguous chunks, while
1452 * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
1453 * pkt is really an mbuf.
1454 */
1455static void
1456catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
1457    void (*cpfn)(const void *, void *, size_t), struct timeval *tv)
1458{
1459	struct bpf_hdr *hp;
1460	int totlen, curlen;
1461	int hdrlen = d->bd_bif->bif_hdrlen;
1462	int do_wakeup = 0;
1463
1464	BPFD_LOCK_ASSERT(d);
1465	/*
1466	 * Figure out how many bytes to move.  If the packet is
1467	 * greater or equal to the snapshot length, transfer that
1468	 * much.  Otherwise, transfer the whole packet (unless
1469	 * we hit the buffer size limit).
1470	 */
1471	totlen = hdrlen + min(snaplen, pktlen);
1472	if (totlen > d->bd_bufsize)
1473		totlen = d->bd_bufsize;
1474
1475	/*
1476	 * Round up the end of the previous packet to the next longword.
1477	 */
1478	curlen = BPF_WORDALIGN(d->bd_slen);
1479	if (curlen + totlen > d->bd_bufsize) {
1480		/*
1481		 * This packet will overflow the storage buffer.
1482		 * Rotate the buffers if we can, then wakeup any
1483		 * pending reads.
1484		 */
1485		if (d->bd_fbuf == NULL) {
1486			/*
1487			 * We haven't completed the previous read yet,
1488			 * so drop the packet.
1489			 */
1490			++d->bd_dcount;
1491			return;
1492		}
1493		ROTATE_BUFFERS(d);
1494		do_wakeup = 1;
1495		curlen = 0;
1496	}
1497	else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
1498		/*
1499		 * Immediate mode is set, or the read timeout has
1500		 * already expired during a select call.  A packet
1501		 * arrived, so the reader should be woken up.
1502		 */
1503		do_wakeup = 1;
1504
1505	/*
1506	 * Append the bpf header.
1507	 */
1508	hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
1509	hp->bh_tstamp = *tv;
1510	hp->bh_datalen = pktlen;
1511	hp->bh_hdrlen = hdrlen;
1512	/*
1513	 * Copy the packet data into the store buffer and update its length.
1514	 */
1515	(*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
1516	d->bd_slen = curlen + totlen;
1517
1518	if (do_wakeup)
1519		bpf_wakeup(d);
1520}
1521
1522/*
1523 * Initialize all nonzero fields of a descriptor.
1524 */
1525static void
1526bpf_allocbufs(struct bpf_d *d)
1527{
1528
1529	KASSERT(d->bd_fbuf == NULL, ("bpf_allocbufs: bd_fbuf != NULL"));
1530	KASSERT(d->bd_sbuf == NULL, ("bpf_allocbufs: bd_sbuf != NULL"));
1531	KASSERT(d->bd_hbuf == NULL, ("bpf_allocbufs: bd_hbuf != NULL"));
1532
1533	d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1534	d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1535	d->bd_slen = 0;
1536	d->bd_hlen = 0;
1537}
1538
1539/*
1540 * Free buffers currently in use by a descriptor.
1541 * Called on close.
1542 */
1543static void
1544bpf_freed(struct bpf_d *d)
1545{
1546	/*
1547	 * We don't need to lock out interrupts since this descriptor has
1548	 * been detached from its interface and it yet hasn't been marked
1549	 * free.
1550	 */
1551	if (d->bd_sbuf != NULL) {
1552		free(d->bd_sbuf, M_BPF);
1553		if (d->bd_hbuf != NULL)
1554			free(d->bd_hbuf, M_BPF);
1555		if (d->bd_fbuf != NULL)
1556			free(d->bd_fbuf, M_BPF);
1557	}
1558	if (d->bd_rfilter) {
1559		free((caddr_t)d->bd_rfilter, M_BPF);
1560#ifdef BPF_JITTER
1561		bpf_destroy_jit_filter(d->bd_bfilter);
1562#endif
1563	}
1564	if (d->bd_wfilter)
1565		free((caddr_t)d->bd_wfilter, M_BPF);
1566	mtx_destroy(&d->bd_mtx);
1567}
1568
1569/*
1570 * Attach an interface to bpf.  dlt is the link layer type; hdrlen is the
1571 * fixed size of the link header (variable length headers not yet supported).
1572 */
1573void
1574bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
1575{
1576
1577	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
1578}
1579
1580/*
1581 * Attach an interface to bpf.  ifp is a pointer to the structure
1582 * defining the interface to be attached, dlt is the link layer type,
1583 * and hdrlen is the fixed size of the link header (variable length
1584 * headers are not yet supporrted).
1585 */
1586void
1587bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
1588{
1589	struct bpf_if *bp;
1590
1591	bp = malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
1592	if (bp == NULL)
1593		panic("bpfattach");
1594
1595	LIST_INIT(&bp->bif_dlist);
1596	bp->bif_ifp = ifp;
1597	bp->bif_dlt = dlt;
1598	mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF);
1599	KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized"));
1600	*driverp = bp;
1601
1602	mtx_lock(&bpf_mtx);
1603	LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
1604	mtx_unlock(&bpf_mtx);
1605
1606	/*
1607	 * Compute the length of the bpf header.  This is not necessarily
1608	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1609	 * that the network layer header begins on a longword boundary (for
1610	 * performance reasons and to alleviate alignment restrictions).
1611	 */
1612	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1613
1614	if (bootverbose)
1615		if_printf(ifp, "bpf attached\n");
1616}
1617
1618/*
1619 * Detach bpf from an interface.  This involves detaching each descriptor
1620 * associated with the interface, and leaving bd_bif NULL.  Notify each
1621 * descriptor as it's detached so that any sleepers wake up and get
1622 * ENXIO.
1623 */
1624void
1625bpfdetach(struct ifnet *ifp)
1626{
1627	struct bpf_if	*bp;
1628	struct bpf_d	*d;
1629
1630	/* Locate BPF interface information */
1631	mtx_lock(&bpf_mtx);
1632	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1633		if (ifp == bp->bif_ifp)
1634			break;
1635	}
1636
1637	/* Interface wasn't attached */
1638	if ((bp == NULL) || (bp->bif_ifp == NULL)) {
1639		mtx_unlock(&bpf_mtx);
1640		printf("bpfdetach: %s was not attached\n", ifp->if_xname);
1641		return;
1642	}
1643
1644	LIST_REMOVE(bp, bif_next);
1645	mtx_unlock(&bpf_mtx);
1646
1647	while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
1648		bpf_detachd(d);
1649		BPFD_LOCK(d);
1650		bpf_wakeup(d);
1651		BPFD_UNLOCK(d);
1652	}
1653
1654	mtx_destroy(&bp->bif_mtx);
1655	free(bp, M_BPF);
1656}
1657
1658/*
1659 * Get a list of available data link type of the interface.
1660 */
1661static int
1662bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
1663{
1664	int n, error;
1665	struct ifnet *ifp;
1666	struct bpf_if *bp;
1667
1668	ifp = d->bd_bif->bif_ifp;
1669	n = 0;
1670	error = 0;
1671	mtx_lock(&bpf_mtx);
1672	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1673		if (bp->bif_ifp != ifp)
1674			continue;
1675		if (bfl->bfl_list != NULL) {
1676			if (n >= bfl->bfl_len) {
1677				mtx_unlock(&bpf_mtx);
1678				return (ENOMEM);
1679			}
1680			error = copyout(&bp->bif_dlt,
1681			    bfl->bfl_list + n, sizeof(u_int));
1682		}
1683		n++;
1684	}
1685	mtx_unlock(&bpf_mtx);
1686	bfl->bfl_len = n;
1687	return (error);
1688}
1689
1690/*
1691 * Set the data link type of a BPF instance.
1692 */
1693static int
1694bpf_setdlt(struct bpf_d *d, u_int dlt)
1695{
1696	int error, opromisc;
1697	struct ifnet *ifp;
1698	struct bpf_if *bp;
1699
1700	if (d->bd_bif->bif_dlt == dlt)
1701		return (0);
1702	ifp = d->bd_bif->bif_ifp;
1703	mtx_lock(&bpf_mtx);
1704	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1705		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
1706			break;
1707	}
1708	mtx_unlock(&bpf_mtx);
1709	if (bp != NULL) {
1710		opromisc = d->bd_promisc;
1711		bpf_detachd(d);
1712		bpf_attachd(d, bp);
1713		BPFD_LOCK(d);
1714		reset_d(d);
1715		BPFD_UNLOCK(d);
1716		if (opromisc) {
1717			error = ifpromisc(bp->bif_ifp, 1);
1718			if (error)
1719				if_printf(bp->bif_ifp,
1720					"bpf_setdlt: ifpromisc failed (%d)\n",
1721					error);
1722			else
1723				d->bd_promisc = 1;
1724		}
1725	}
1726	return (bp == NULL ? EINVAL : 0);
1727}
1728
1729static void
1730bpf_clone(void *arg, struct ucred *cred, char *name, int namelen,
1731    struct cdev **dev)
1732{
1733	int u;
1734
1735	if (*dev != NULL)
1736		return;
1737	if (dev_stdclone(name, NULL, "bpf", &u) != 1)
1738		return;
1739	*dev = make_dev(&bpf_cdevsw, unit2minor(u), UID_ROOT, GID_WHEEL, 0600,
1740	    "bpf%d", u);
1741	dev_ref(*dev);
1742	(*dev)->si_flags |= SI_CHEAPCLONE;
1743	return;
1744}
1745
1746static void
1747bpf_drvinit(void *unused)
1748{
1749
1750	mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
1751	LIST_INIT(&bpf_iflist);
1752	EVENTHANDLER_REGISTER(dev_clone, bpf_clone, 0, 1000);
1753}
1754
1755static void
1756bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
1757{
1758
1759	bzero(d, sizeof(*d));
1760	BPFD_LOCK_ASSERT(bd);
1761	d->bd_immediate = bd->bd_immediate;
1762	d->bd_promisc = bd->bd_promisc;
1763	d->bd_hdrcmplt = bd->bd_hdrcmplt;
1764	d->bd_direction = bd->bd_direction;
1765	d->bd_feedback = bd->bd_feedback;
1766	d->bd_async = bd->bd_async;
1767	d->bd_rcount = bd->bd_rcount;
1768	d->bd_dcount = bd->bd_dcount;
1769	d->bd_fcount = bd->bd_fcount;
1770	d->bd_sig = bd->bd_sig;
1771	d->bd_slen = bd->bd_slen;
1772	d->bd_hlen = bd->bd_hlen;
1773	d->bd_bufsize = bd->bd_bufsize;
1774	d->bd_pid = bd->bd_pid;
1775	strlcpy(d->bd_ifname,
1776	    bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
1777	d->bd_locked = bd->bd_locked;
1778}
1779
1780static int
1781bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
1782{
1783	struct xbpf_d *xbdbuf, *xbd;
1784	int index, error;
1785	struct bpf_if *bp;
1786	struct bpf_d *bd;
1787
1788	/*
1789	 * XXX This is not technically correct. It is possible for non
1790	 * privileged users to open bpf devices. It would make sense
1791	 * if the users who opened the devices were able to retrieve
1792	 * the statistics for them, too.
1793	 */
1794	error = priv_check(req->td, PRIV_NET_BPF);
1795	if (error)
1796		return (error);
1797	if (req->oldptr == NULL)
1798		return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd)));
1799	if (bpf_bpfd_cnt == 0)
1800		return (SYSCTL_OUT(req, 0, 0));
1801	xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK);
1802	mtx_lock(&bpf_mtx);
1803	if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) {
1804		mtx_unlock(&bpf_mtx);
1805		free(xbdbuf, M_BPF);
1806		return (ENOMEM);
1807	}
1808	index = 0;
1809	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1810		BPFIF_LOCK(bp);
1811		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
1812			xbd = &xbdbuf[index++];
1813			BPFD_LOCK(bd);
1814			bpfstats_fill_xbpf(xbd, bd);
1815			BPFD_UNLOCK(bd);
1816		}
1817		BPFIF_UNLOCK(bp);
1818	}
1819	mtx_unlock(&bpf_mtx);
1820	error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
1821	free(xbdbuf, M_BPF);
1822	return (error);
1823}
1824
1825SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL)
1826
1827#else /* !DEV_BPF && !NETGRAPH_BPF */
1828/*
1829 * NOP stubs to allow bpf-using drivers to load and function.
1830 *
1831 * A 'better' implementation would allow the core bpf functionality
1832 * to be loaded at runtime.
1833 */
1834static struct bpf_if bp_null;
1835
1836void
1837bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
1838{
1839}
1840
1841void
1842bpf_mtap(struct bpf_if *bp, struct mbuf *m)
1843{
1844}
1845
1846void
1847bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m)
1848{
1849}
1850
1851void
1852bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
1853{
1854
1855	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
1856}
1857
1858void
1859bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
1860{
1861
1862	*driverp = &bp_null;
1863}
1864
1865void
1866bpfdetach(struct ifnet *ifp)
1867{
1868}
1869
1870u_int
1871bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
1872{
1873	return -1;	/* "no filter" behaviour */
1874}
1875
1876int
1877bpf_validate(const struct bpf_insn *f, int len)
1878{
1879	return 0;		/* false */
1880}
1881
1882#endif /* !DEV_BPF && !NETGRAPH_BPF */
1883