bpf.c revision 172108
1/*-
2 * Copyright (c) 1990, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from the Stanford/CMU enet packet filter,
6 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
7 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
8 * Berkeley Laboratory.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *      @(#)bpf.c	8.4 (Berkeley) 1/9/95
35 *
36 * $FreeBSD: head/sys/net/bpf.c 172108 2007-09-10 00:03:06Z thompsa $
37 */
38
39#include "opt_bpf.h"
40#include "opt_mac.h"
41#include "opt_netgraph.h"
42
43#include <sys/types.h>
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/conf.h>
47#include <sys/fcntl.h>
48#include <sys/malloc.h>
49#include <sys/mbuf.h>
50#include <sys/time.h>
51#include <sys/priv.h>
52#include <sys/proc.h>
53#include <sys/signalvar.h>
54#include <sys/filio.h>
55#include <sys/sockio.h>
56#include <sys/ttycom.h>
57#include <sys/uio.h>
58
59#include <sys/event.h>
60#include <sys/file.h>
61#include <sys/poll.h>
62#include <sys/proc.h>
63
64#include <sys/socket.h>
65
66#include <net/if.h>
67#include <net/bpf.h>
68#ifdef BPF_JITTER
69#include <net/bpf_jitter.h>
70#endif
71#include <net/bpfdesc.h>
72
73#include <netinet/in.h>
74#include <netinet/if_ether.h>
75#include <sys/kernel.h>
76#include <sys/sysctl.h>
77
78#include <net80211/ieee80211_freebsd.h>
79
80#include <security/mac/mac_framework.h>
81
82static MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
83
84#if defined(DEV_BPF) || defined(NETGRAPH_BPF)
85
86#define PRINET  26			/* interruptible */
87
88#define	M_SKIP_BPF	M_SKIP_FIREWALL
89
90/*
91 * bpf_iflist is a list of BPF interface structures, each corresponding to a
92 * specific DLT.  The same network interface might have several BPF interface
93 * structures registered by different layers in the stack (i.e., 802.11
94 * frames, ethernet frames, etc).
95 */
96static LIST_HEAD(, bpf_if)	bpf_iflist;
97static struct mtx	bpf_mtx;		/* bpf global lock */
98static int		bpf_bpfd_cnt;
99
100static void	bpf_allocbufs(struct bpf_d *);
101static void	bpf_attachd(struct bpf_d *, struct bpf_if *);
102static void	bpf_detachd(struct bpf_d *);
103static void	bpf_freed(struct bpf_d *);
104static void	bpf_mcopy(const void *, void *, size_t);
105static int	bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
106		    struct sockaddr *, int *, struct bpf_insn *);
107static int	bpf_setif(struct bpf_d *, struct ifreq *);
108static void	bpf_timed_out(void *);
109static __inline void
110		bpf_wakeup(struct bpf_d *);
111static void	catchpacket(struct bpf_d *, u_char *, u_int,
112		    u_int, void (*)(const void *, void *, size_t),
113		    struct timeval *);
114static void	reset_d(struct bpf_d *);
115static int	 bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
116static int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
117static int	bpf_setdlt(struct bpf_d *, u_int);
118static void	filt_bpfdetach(struct knote *);
119static int	filt_bpfread(struct knote *, long);
120static void	bpf_drvinit(void *);
121static void	bpf_clone(void *, struct ucred *, char *, int, struct cdev **);
122static int	bpf_stats_sysctl(SYSCTL_HANDLER_ARGS);
123
124SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl");
125static int bpf_bufsize = 4096;
126SYSCTL_INT(_net_bpf, OID_AUTO, bufsize, CTLFLAG_RW,
127    &bpf_bufsize, 0, "Default bpf buffer size");
128static int bpf_maxbufsize = BPF_MAXBUFSIZE;
129SYSCTL_INT(_net_bpf, OID_AUTO, maxbufsize, CTLFLAG_RW,
130    &bpf_maxbufsize, 0, "Maximum bpf buffer size");
131static int bpf_maxinsns = BPF_MAXINSNS;
132SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
133    &bpf_maxinsns, 0, "Maximum bpf program instructions");
134SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_RW,
135    bpf_stats_sysctl, "bpf statistics portal");
136
137static	d_open_t	bpfopen;
138static	d_close_t	bpfclose;
139static	d_read_t	bpfread;
140static	d_write_t	bpfwrite;
141static	d_ioctl_t	bpfioctl;
142static	d_poll_t	bpfpoll;
143static	d_kqfilter_t	bpfkqfilter;
144
145static struct cdevsw bpf_cdevsw = {
146	.d_version =	D_VERSION,
147	.d_open =	bpfopen,
148	.d_close =	bpfclose,
149	.d_read =	bpfread,
150	.d_write =	bpfwrite,
151	.d_ioctl =	bpfioctl,
152	.d_poll =	bpfpoll,
153	.d_name =	"bpf",
154	.d_kqfilter =	bpfkqfilter,
155};
156
157static struct filterops bpfread_filtops =
158	{ 1, NULL, filt_bpfdetach, filt_bpfread };
159
160static int
161bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
162    struct sockaddr *sockp, int *hdrlen, struct bpf_insn *wfilter)
163{
164	const struct ieee80211_bpf_params *p;
165	struct ether_header *eh;
166	struct mbuf *m;
167	int error;
168	int len;
169	int hlen;
170	int slen;
171
172	/*
173	 * Build a sockaddr based on the data link layer type.
174	 * We do this at this level because the ethernet header
175	 * is copied directly into the data field of the sockaddr.
176	 * In the case of SLIP, there is no header and the packet
177	 * is forwarded as is.
178	 * Also, we are careful to leave room at the front of the mbuf
179	 * for the link level header.
180	 */
181	switch (linktype) {
182
183	case DLT_SLIP:
184		sockp->sa_family = AF_INET;
185		hlen = 0;
186		break;
187
188	case DLT_EN10MB:
189		sockp->sa_family = AF_UNSPEC;
190		/* XXX Would MAXLINKHDR be better? */
191		hlen = ETHER_HDR_LEN;
192		break;
193
194	case DLT_FDDI:
195		sockp->sa_family = AF_IMPLINK;
196		hlen = 0;
197		break;
198
199	case DLT_RAW:
200		sockp->sa_family = AF_UNSPEC;
201		hlen = 0;
202		break;
203
204	case DLT_NULL:
205		/*
206		 * null interface types require a 4 byte pseudo header which
207		 * corresponds to the address family of the packet.
208		 */
209		sockp->sa_family = AF_UNSPEC;
210		hlen = 4;
211		break;
212
213	case DLT_ATM_RFC1483:
214		/*
215		 * en atm driver requires 4-byte atm pseudo header.
216		 * though it isn't standard, vpi:vci needs to be
217		 * specified anyway.
218		 */
219		sockp->sa_family = AF_UNSPEC;
220		hlen = 12;	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
221		break;
222
223	case DLT_PPP:
224		sockp->sa_family = AF_UNSPEC;
225		hlen = 4;	/* This should match PPP_HDRLEN */
226		break;
227
228	case DLT_IEEE802_11:		/* IEEE 802.11 wireless */
229		sockp->sa_family = AF_IEEE80211;
230		hlen = 0;
231		break;
232
233	case DLT_IEEE802_11_RADIO:	/* IEEE 802.11 wireless w/ phy params */
234		sockp->sa_family = AF_IEEE80211;
235		sockp->sa_len = 12;	/* XXX != 0 */
236		hlen = sizeof(struct ieee80211_bpf_params);
237		break;
238
239	default:
240		return (EIO);
241	}
242
243	len = uio->uio_resid;
244
245	if (len - hlen > ifp->if_mtu)
246		return (EMSGSIZE);
247
248	if ((unsigned)len > MCLBYTES)
249		return (EIO);
250
251	if (len > MHLEN) {
252		m = m_getcl(M_TRYWAIT, MT_DATA, M_PKTHDR);
253	} else {
254		MGETHDR(m, M_TRYWAIT, MT_DATA);
255	}
256	if (m == NULL)
257		return (ENOBUFS);
258	m->m_pkthdr.len = m->m_len = len;
259	m->m_pkthdr.rcvif = NULL;
260	*mp = m;
261
262	if (m->m_len < hlen) {
263		error = EPERM;
264		goto bad;
265	}
266
267	error = uiomove(mtod(m, u_char *), len, uio);
268	if (error)
269		goto bad;
270
271	slen = bpf_filter(wfilter, mtod(m, u_char *), len, len);
272	if (slen == 0) {
273		error = EPERM;
274		goto bad;
275	}
276
277	/* Check for multicast destination */
278	switch (linktype) {
279	case DLT_EN10MB:
280		eh = mtod(m, struct ether_header *);
281		if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
282			if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost,
283			    ETHER_ADDR_LEN) == 0)
284				m->m_flags |= M_BCAST;
285			else
286				m->m_flags |= M_MCAST;
287		}
288		break;
289	}
290
291	/*
292	 * Make room for link header, and copy it to sockaddr
293	 */
294	if (hlen != 0) {
295		if (sockp->sa_family == AF_IEEE80211) {
296			/*
297			 * Collect true length from the parameter header
298			 * NB: sockp is known to be zero'd so if we do a
299			 *     short copy unspecified parameters will be
300			 *     zero.
301			 * NB: packet may not be aligned after stripping
302			 *     bpf params
303			 * XXX check ibp_vers
304			 */
305			p = mtod(m, const struct ieee80211_bpf_params *);
306			hlen = p->ibp_len;
307			if (hlen > sizeof(sockp->sa_data)) {
308				error = EINVAL;
309				goto bad;
310			}
311		}
312		bcopy(m->m_data, sockp->sa_data, hlen);
313	}
314	*hdrlen = hlen;
315
316	return (0);
317bad:
318	m_freem(m);
319	return (error);
320}
321
322/*
323 * Attach file to the bpf interface, i.e. make d listen on bp.
324 */
325static void
326bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
327{
328	/*
329	 * Point d at bp, and add d to the interface's list of listeners.
330	 * Finally, point the driver's bpf cookie at the interface so
331	 * it will divert packets to bpf.
332	 */
333	BPFIF_LOCK(bp);
334	d->bd_bif = bp;
335	LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
336
337	bpf_bpfd_cnt++;
338	BPFIF_UNLOCK(bp);
339}
340
341/*
342 * Detach a file from its interface.
343 */
344static void
345bpf_detachd(struct bpf_d *d)
346{
347	int error;
348	struct bpf_if *bp;
349	struct ifnet *ifp;
350
351	bp = d->bd_bif;
352	BPFIF_LOCK(bp);
353	BPFD_LOCK(d);
354	ifp = d->bd_bif->bif_ifp;
355
356	/*
357	 * Remove d from the interface's descriptor list.
358	 */
359	LIST_REMOVE(d, bd_next);
360
361	bpf_bpfd_cnt--;
362	d->bd_bif = NULL;
363	BPFD_UNLOCK(d);
364	BPFIF_UNLOCK(bp);
365
366	/*
367	 * Check if this descriptor had requested promiscuous mode.
368	 * If so, turn it off.
369	 */
370	if (d->bd_promisc) {
371		d->bd_promisc = 0;
372		error = ifpromisc(ifp, 0);
373		if (error != 0 && error != ENXIO) {
374			/*
375			 * ENXIO can happen if a pccard is unplugged
376			 * Something is really wrong if we were able to put
377			 * the driver into promiscuous mode, but can't
378			 * take it out.
379			 */
380			if_printf(bp->bif_ifp,
381				"bpf_detach: ifpromisc failed (%d)\n", error);
382		}
383	}
384}
385
386/*
387 * Open ethernet device.  Returns ENXIO for illegal minor device number,
388 * EBUSY if file is open by another process.
389 */
390/* ARGSUSED */
391static	int
392bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
393{
394	struct bpf_d *d;
395
396	mtx_lock(&bpf_mtx);
397	d = dev->si_drv1;
398	/*
399	 * Each minor can be opened by only one process.  If the requested
400	 * minor is in use, return EBUSY.
401	 */
402	if (d != NULL) {
403		mtx_unlock(&bpf_mtx);
404		return (EBUSY);
405	}
406	dev->si_drv1 = (struct bpf_d *)~0;	/* mark device in use */
407	mtx_unlock(&bpf_mtx);
408
409	if ((dev->si_flags & SI_NAMED) == 0)
410		make_dev(&bpf_cdevsw, minor(dev), UID_ROOT, GID_WHEEL, 0600,
411		    "bpf%d", dev2unit(dev));
412	MALLOC(d, struct bpf_d *, sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
413	dev->si_drv1 = d;
414	d->bd_bufsize = bpf_bufsize;
415	d->bd_sig = SIGIO;
416	d->bd_direction = BPF_D_INOUT;
417	d->bd_pid = td->td_proc->p_pid;
418#ifdef MAC
419	mac_init_bpfdesc(d);
420	mac_create_bpfdesc(td->td_ucred, d);
421#endif
422	mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
423	callout_init(&d->bd_callout, CALLOUT_MPSAFE);
424	knlist_init(&d->bd_sel.si_note, &d->bd_mtx, NULL, NULL, NULL);
425
426	return (0);
427}
428
429/*
430 * Close the descriptor by detaching it from its interface,
431 * deallocating its buffers, and marking it free.
432 */
433/* ARGSUSED */
434static	int
435bpfclose(struct cdev *dev, int flags, int fmt, struct thread *td)
436{
437	struct bpf_d *d = dev->si_drv1;
438
439	BPFD_LOCK(d);
440	if (d->bd_state == BPF_WAITING)
441		callout_stop(&d->bd_callout);
442	d->bd_state = BPF_IDLE;
443	BPFD_UNLOCK(d);
444	funsetown(&d->bd_sigio);
445	mtx_lock(&bpf_mtx);
446	if (d->bd_bif)
447		bpf_detachd(d);
448	mtx_unlock(&bpf_mtx);
449	selwakeuppri(&d->bd_sel, PRINET);
450#ifdef MAC
451	mac_destroy_bpfdesc(d);
452#endif /* MAC */
453	knlist_destroy(&d->bd_sel.si_note);
454	bpf_freed(d);
455	dev->si_drv1 = NULL;
456	free(d, M_BPF);
457
458	return (0);
459}
460
461
462/*
463 * Rotate the packet buffers in descriptor d.  Move the store buffer
464 * into the hold slot, and the free buffer into the store slot.
465 * Zero the length of the new store buffer.
466 */
467#define ROTATE_BUFFERS(d) \
468	(d)->bd_hbuf = (d)->bd_sbuf; \
469	(d)->bd_hlen = (d)->bd_slen; \
470	(d)->bd_sbuf = (d)->bd_fbuf; \
471	(d)->bd_slen = 0; \
472	(d)->bd_fbuf = NULL;
473/*
474 *  bpfread - read next chunk of packets from buffers
475 */
476static	int
477bpfread(struct cdev *dev, struct uio *uio, int ioflag)
478{
479	struct bpf_d *d = dev->si_drv1;
480	int timed_out;
481	int error;
482
483	/*
484	 * Restrict application to use a buffer the same size as
485	 * as kernel buffers.
486	 */
487	if (uio->uio_resid != d->bd_bufsize)
488		return (EINVAL);
489
490	BPFD_LOCK(d);
491	if (d->bd_state == BPF_WAITING)
492		callout_stop(&d->bd_callout);
493	timed_out = (d->bd_state == BPF_TIMED_OUT);
494	d->bd_state = BPF_IDLE;
495	/*
496	 * If the hold buffer is empty, then do a timed sleep, which
497	 * ends when the timeout expires or when enough packets
498	 * have arrived to fill the store buffer.
499	 */
500	while (d->bd_hbuf == NULL) {
501		if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
502			/*
503			 * A packet(s) either arrived since the previous
504			 * read or arrived while we were asleep.
505			 * Rotate the buffers and return what's here.
506			 */
507			ROTATE_BUFFERS(d);
508			break;
509		}
510
511		/*
512		 * No data is available, check to see if the bpf device
513		 * is still pointed at a real interface.  If not, return
514		 * ENXIO so that the userland process knows to rebind
515		 * it before using it again.
516		 */
517		if (d->bd_bif == NULL) {
518			BPFD_UNLOCK(d);
519			return (ENXIO);
520		}
521
522		if (ioflag & O_NONBLOCK) {
523			BPFD_UNLOCK(d);
524			return (EWOULDBLOCK);
525		}
526		error = msleep(d, &d->bd_mtx, PRINET|PCATCH,
527		     "bpf", d->bd_rtout);
528		if (error == EINTR || error == ERESTART) {
529			BPFD_UNLOCK(d);
530			return (error);
531		}
532		if (error == EWOULDBLOCK) {
533			/*
534			 * On a timeout, return what's in the buffer,
535			 * which may be nothing.  If there is something
536			 * in the store buffer, we can rotate the buffers.
537			 */
538			if (d->bd_hbuf)
539				/*
540				 * We filled up the buffer in between
541				 * getting the timeout and arriving
542				 * here, so we don't need to rotate.
543				 */
544				break;
545
546			if (d->bd_slen == 0) {
547				BPFD_UNLOCK(d);
548				return (0);
549			}
550			ROTATE_BUFFERS(d);
551			break;
552		}
553	}
554	/*
555	 * At this point, we know we have something in the hold slot.
556	 */
557	BPFD_UNLOCK(d);
558
559	/*
560	 * Move data from hold buffer into user space.
561	 * We know the entire buffer is transferred since
562	 * we checked above that the read buffer is bpf_bufsize bytes.
563	 */
564	error = uiomove(d->bd_hbuf, d->bd_hlen, uio);
565
566	BPFD_LOCK(d);
567	d->bd_fbuf = d->bd_hbuf;
568	d->bd_hbuf = NULL;
569	d->bd_hlen = 0;
570	BPFD_UNLOCK(d);
571
572	return (error);
573}
574
575
576/*
577 * If there are processes sleeping on this descriptor, wake them up.
578 */
579static __inline void
580bpf_wakeup(struct bpf_d *d)
581{
582
583	BPFD_LOCK_ASSERT(d);
584	if (d->bd_state == BPF_WAITING) {
585		callout_stop(&d->bd_callout);
586		d->bd_state = BPF_IDLE;
587	}
588	wakeup(d);
589	if (d->bd_async && d->bd_sig && d->bd_sigio)
590		pgsigio(&d->bd_sigio, d->bd_sig, 0);
591
592	selwakeuppri(&d->bd_sel, PRINET);
593	KNOTE_LOCKED(&d->bd_sel.si_note, 0);
594}
595
596static void
597bpf_timed_out(void *arg)
598{
599	struct bpf_d *d = (struct bpf_d *)arg;
600
601	BPFD_LOCK(d);
602	if (d->bd_state == BPF_WAITING) {
603		d->bd_state = BPF_TIMED_OUT;
604		if (d->bd_slen != 0)
605			bpf_wakeup(d);
606	}
607	BPFD_UNLOCK(d);
608}
609
610static int
611bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
612{
613	struct bpf_d *d = dev->si_drv1;
614	struct ifnet *ifp;
615	struct mbuf *m, *mc;
616	struct sockaddr dst;
617	int error, hlen;
618
619	if (d->bd_bif == NULL)
620		return (ENXIO);
621
622	ifp = d->bd_bif->bif_ifp;
623
624	if ((ifp->if_flags & IFF_UP) == 0)
625		return (ENETDOWN);
626
627	if (uio->uio_resid == 0)
628		return (0);
629
630	bzero(&dst, sizeof(dst));
631	m = NULL;
632	hlen = 0;
633	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
634	    &m, &dst, &hlen, d->bd_wfilter);
635	if (error)
636		return (error);
637
638	if (d->bd_hdrcmplt)
639		dst.sa_family = pseudo_AF_HDRCMPLT;
640
641	if (d->bd_feedback) {
642		mc = m_dup(m, M_DONTWAIT);
643		if (mc != NULL)
644			mc->m_pkthdr.rcvif = ifp;
645		/* XXX Do not return the same packet twice. */
646		if (d->bd_direction == BPF_D_INOUT)
647			m->m_flags |= M_SKIP_BPF;
648	} else
649		mc = NULL;
650
651	m->m_pkthdr.len -= hlen;
652	m->m_len -= hlen;
653	m->m_data += hlen;	/* XXX */
654
655#ifdef MAC
656	BPFD_LOCK(d);
657	mac_create_mbuf_from_bpfdesc(d, m);
658	if (mc != NULL)
659		mac_create_mbuf_from_bpfdesc(d, mc);
660	BPFD_UNLOCK(d);
661#endif
662
663	error = (*ifp->if_output)(ifp, m, &dst, NULL);
664
665	if (mc != NULL) {
666		if (error == 0)
667			(*ifp->if_input)(ifp, mc);
668		else
669			m_freem(mc);
670	}
671
672	return (error);
673}
674
675/*
676 * Reset a descriptor by flushing its packet buffer and clearing the
677 * receive and drop counts.
678 */
679static void
680reset_d(struct bpf_d *d)
681{
682
683	mtx_assert(&d->bd_mtx, MA_OWNED);
684	if (d->bd_hbuf) {
685		/* Free the hold buffer. */
686		d->bd_fbuf = d->bd_hbuf;
687		d->bd_hbuf = NULL;
688	}
689	d->bd_slen = 0;
690	d->bd_hlen = 0;
691	d->bd_rcount = 0;
692	d->bd_dcount = 0;
693	d->bd_fcount = 0;
694}
695
696/*
697 *  FIONREAD		Check for read packet available.
698 *  SIOCGIFADDR		Get interface address - convenient hook to driver.
699 *  BIOCGBLEN		Get buffer len [for read()].
700 *  BIOCSETF		Set ethernet read filter.
701 *  BIOCSETWF		Set ethernet write filter.
702 *  BIOCFLUSH		Flush read packet buffer.
703 *  BIOCPROMISC		Put interface into promiscuous mode.
704 *  BIOCGDLT		Get link layer type.
705 *  BIOCGETIF		Get interface name.
706 *  BIOCSETIF		Set interface.
707 *  BIOCSRTIMEOUT	Set read timeout.
708 *  BIOCGRTIMEOUT	Get read timeout.
709 *  BIOCGSTATS		Get packet stats.
710 *  BIOCIMMEDIATE	Set immediate mode.
711 *  BIOCVERSION		Get filter language version.
712 *  BIOCGHDRCMPLT	Get "header already complete" flag
713 *  BIOCSHDRCMPLT	Set "header already complete" flag
714 *  BIOCGDIRECTION	Get packet direction flag
715 *  BIOCSDIRECTION	Set packet direction flag
716 *  BIOCLOCK		Set "locked" flag
717 *  BIOCFEEDBACK	Set packet feedback mode.
718 */
719/* ARGSUSED */
720static	int
721bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
722    struct thread *td)
723{
724	struct bpf_d *d = dev->si_drv1;
725	int error = 0;
726
727	/*
728	 * Refresh PID associated with this descriptor.
729	 */
730	BPFD_LOCK(d);
731	d->bd_pid = td->td_proc->p_pid;
732	if (d->bd_state == BPF_WAITING)
733		callout_stop(&d->bd_callout);
734	d->bd_state = BPF_IDLE;
735	BPFD_UNLOCK(d);
736
737	if (d->bd_locked == 1) {
738		switch (cmd) {
739		case BIOCGBLEN:
740		case BIOCFLUSH:
741		case BIOCGDLT:
742		case BIOCGDLTLIST:
743		case BIOCGETIF:
744		case BIOCGRTIMEOUT:
745		case BIOCGSTATS:
746		case BIOCVERSION:
747		case BIOCGRSIG:
748		case BIOCGHDRCMPLT:
749		case BIOCFEEDBACK:
750		case FIONREAD:
751		case BIOCLOCK:
752		case BIOCSRTIMEOUT:
753		case BIOCIMMEDIATE:
754		case TIOCGPGRP:
755			break;
756		default:
757			return (EPERM);
758		}
759	}
760	switch (cmd) {
761
762	default:
763		error = EINVAL;
764		break;
765
766	/*
767	 * Check for read packet available.
768	 */
769	case FIONREAD:
770		{
771			int n;
772
773			BPFD_LOCK(d);
774			n = d->bd_slen;
775			if (d->bd_hbuf)
776				n += d->bd_hlen;
777			BPFD_UNLOCK(d);
778
779			*(int *)addr = n;
780			break;
781		}
782
783	case SIOCGIFADDR:
784		{
785			struct ifnet *ifp;
786
787			if (d->bd_bif == NULL)
788				error = EINVAL;
789			else {
790				ifp = d->bd_bif->bif_ifp;
791				error = (*ifp->if_ioctl)(ifp, cmd, addr);
792			}
793			break;
794		}
795
796	/*
797	 * Get buffer len [for read()].
798	 */
799	case BIOCGBLEN:
800		*(u_int *)addr = d->bd_bufsize;
801		break;
802
803	/*
804	 * Set buffer length.
805	 */
806	case BIOCSBLEN:
807		if (d->bd_bif != NULL)
808			error = EINVAL;
809		else {
810			u_int size = *(u_int *)addr;
811
812			if (size > bpf_maxbufsize)
813				*(u_int *)addr = size = bpf_maxbufsize;
814			else if (size < BPF_MINBUFSIZE)
815				*(u_int *)addr = size = BPF_MINBUFSIZE;
816			d->bd_bufsize = size;
817		}
818		break;
819
820	/*
821	 * Set link layer read filter.
822	 */
823	case BIOCSETF:
824	case BIOCSETWF:
825		error = bpf_setf(d, (struct bpf_program *)addr, cmd);
826		break;
827
828	/*
829	 * Flush read packet buffer.
830	 */
831	case BIOCFLUSH:
832		BPFD_LOCK(d);
833		reset_d(d);
834		BPFD_UNLOCK(d);
835		break;
836
837	/*
838	 * Put interface into promiscuous mode.
839	 */
840	case BIOCPROMISC:
841		if (d->bd_bif == NULL) {
842			/*
843			 * No interface attached yet.
844			 */
845			error = EINVAL;
846			break;
847		}
848		if (d->bd_promisc == 0) {
849			error = ifpromisc(d->bd_bif->bif_ifp, 1);
850			if (error == 0)
851				d->bd_promisc = 1;
852		}
853		break;
854
855	/*
856	 * Get current data link type.
857	 */
858	case BIOCGDLT:
859		if (d->bd_bif == NULL)
860			error = EINVAL;
861		else
862			*(u_int *)addr = d->bd_bif->bif_dlt;
863		break;
864
865	/*
866	 * Get a list of supported data link types.
867	 */
868	case BIOCGDLTLIST:
869		if (d->bd_bif == NULL)
870			error = EINVAL;
871		else
872			error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
873		break;
874
875	/*
876	 * Set data link type.
877	 */
878	case BIOCSDLT:
879		if (d->bd_bif == NULL)
880			error = EINVAL;
881		else
882			error = bpf_setdlt(d, *(u_int *)addr);
883		break;
884
885	/*
886	 * Get interface name.
887	 */
888	case BIOCGETIF:
889		if (d->bd_bif == NULL)
890			error = EINVAL;
891		else {
892			struct ifnet *const ifp = d->bd_bif->bif_ifp;
893			struct ifreq *const ifr = (struct ifreq *)addr;
894
895			strlcpy(ifr->ifr_name, ifp->if_xname,
896			    sizeof(ifr->ifr_name));
897		}
898		break;
899
900	/*
901	 * Set interface.
902	 */
903	case BIOCSETIF:
904		error = bpf_setif(d, (struct ifreq *)addr);
905		break;
906
907	/*
908	 * Set read timeout.
909	 */
910	case BIOCSRTIMEOUT:
911		{
912			struct timeval *tv = (struct timeval *)addr;
913
914			/*
915			 * Subtract 1 tick from tvtohz() since this isn't
916			 * a one-shot timer.
917			 */
918			if ((error = itimerfix(tv)) == 0)
919				d->bd_rtout = tvtohz(tv) - 1;
920			break;
921		}
922
923	/*
924	 * Get read timeout.
925	 */
926	case BIOCGRTIMEOUT:
927		{
928			struct timeval *tv = (struct timeval *)addr;
929
930			tv->tv_sec = d->bd_rtout / hz;
931			tv->tv_usec = (d->bd_rtout % hz) * tick;
932			break;
933		}
934
935	/*
936	 * Get packet stats.
937	 */
938	case BIOCGSTATS:
939		{
940			struct bpf_stat *bs = (struct bpf_stat *)addr;
941
942			bs->bs_recv = d->bd_rcount;
943			bs->bs_drop = d->bd_dcount;
944			break;
945		}
946
947	/*
948	 * Set immediate mode.
949	 */
950	case BIOCIMMEDIATE:
951		d->bd_immediate = *(u_int *)addr;
952		break;
953
954	case BIOCVERSION:
955		{
956			struct bpf_version *bv = (struct bpf_version *)addr;
957
958			bv->bv_major = BPF_MAJOR_VERSION;
959			bv->bv_minor = BPF_MINOR_VERSION;
960			break;
961		}
962
963	/*
964	 * Get "header already complete" flag
965	 */
966	case BIOCGHDRCMPLT:
967		*(u_int *)addr = d->bd_hdrcmplt;
968		break;
969
970	/*
971	 * Set "header already complete" flag
972	 */
973	case BIOCSHDRCMPLT:
974		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
975		break;
976
977	/*
978	 * Get packet direction flag
979	 */
980	case BIOCGDIRECTION:
981		*(u_int *)addr = d->bd_direction;
982		break;
983
984	/*
985	 * Set packet direction flag
986	 */
987	case BIOCSDIRECTION:
988		{
989			u_int	direction;
990
991			direction = *(u_int *)addr;
992			switch (direction) {
993			case BPF_D_IN:
994			case BPF_D_INOUT:
995			case BPF_D_OUT:
996				d->bd_direction = direction;
997				break;
998			default:
999				error = EINVAL;
1000			}
1001		}
1002		break;
1003
1004	case BIOCFEEDBACK:
1005		d->bd_feedback = *(u_int *)addr;
1006		break;
1007
1008	case BIOCLOCK:
1009		d->bd_locked = 1;
1010		break;
1011
1012	case FIONBIO:		/* Non-blocking I/O */
1013		break;
1014
1015	case FIOASYNC:		/* Send signal on receive packets */
1016		d->bd_async = *(int *)addr;
1017		break;
1018
1019	case FIOSETOWN:
1020		error = fsetown(*(int *)addr, &d->bd_sigio);
1021		break;
1022
1023	case FIOGETOWN:
1024		*(int *)addr = fgetown(&d->bd_sigio);
1025		break;
1026
1027	/* This is deprecated, FIOSETOWN should be used instead. */
1028	case TIOCSPGRP:
1029		error = fsetown(-(*(int *)addr), &d->bd_sigio);
1030		break;
1031
1032	/* This is deprecated, FIOGETOWN should be used instead. */
1033	case TIOCGPGRP:
1034		*(int *)addr = -fgetown(&d->bd_sigio);
1035		break;
1036
1037	case BIOCSRSIG:		/* Set receive signal */
1038		{
1039			u_int sig;
1040
1041			sig = *(u_int *)addr;
1042
1043			if (sig >= NSIG)
1044				error = EINVAL;
1045			else
1046				d->bd_sig = sig;
1047			break;
1048		}
1049	case BIOCGRSIG:
1050		*(u_int *)addr = d->bd_sig;
1051		break;
1052	}
1053	return (error);
1054}
1055
1056/*
1057 * Set d's packet filter program to fp.  If this file already has a filter,
1058 * free it and replace it.  Returns EINVAL for bogus requests.
1059 */
1060static int
1061bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
1062{
1063	struct bpf_insn *fcode, *old;
1064	u_int wfilter, flen, size;
1065#ifdef BPF_JITTER
1066	bpf_jit_filter *ofunc;
1067#endif
1068
1069	if (cmd == BIOCSETWF) {
1070		old = d->bd_wfilter;
1071		wfilter = 1;
1072#ifdef BPF_JITTER
1073		ofunc = NULL;
1074#endif
1075	} else {
1076		wfilter = 0;
1077		old = d->bd_rfilter;
1078#ifdef BPF_JITTER
1079		ofunc = d->bd_bfilter;
1080#endif
1081	}
1082	if (fp->bf_insns == NULL) {
1083		if (fp->bf_len != 0)
1084			return (EINVAL);
1085		BPFD_LOCK(d);
1086		if (wfilter)
1087			d->bd_wfilter = NULL;
1088		else {
1089			d->bd_rfilter = NULL;
1090#ifdef BPF_JITTER
1091			d->bd_bfilter = NULL;
1092#endif
1093		}
1094		reset_d(d);
1095		BPFD_UNLOCK(d);
1096		if (old != NULL)
1097			free((caddr_t)old, M_BPF);
1098#ifdef BPF_JITTER
1099		if (ofunc != NULL)
1100			bpf_destroy_jit_filter(ofunc);
1101#endif
1102		return (0);
1103	}
1104	flen = fp->bf_len;
1105	if (flen > bpf_maxinsns)
1106		return (EINVAL);
1107
1108	size = flen * sizeof(*fp->bf_insns);
1109	fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
1110	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
1111	    bpf_validate(fcode, (int)flen)) {
1112		BPFD_LOCK(d);
1113		if (wfilter)
1114			d->bd_wfilter = fcode;
1115		else {
1116			d->bd_rfilter = fcode;
1117#ifdef BPF_JITTER
1118			d->bd_bfilter = bpf_jitter(fcode, flen);
1119#endif
1120		}
1121		reset_d(d);
1122		BPFD_UNLOCK(d);
1123		if (old != NULL)
1124			free((caddr_t)old, M_BPF);
1125#ifdef BPF_JITTER
1126		if (ofunc != NULL)
1127			bpf_destroy_jit_filter(ofunc);
1128#endif
1129
1130		return (0);
1131	}
1132	free((caddr_t)fcode, M_BPF);
1133	return (EINVAL);
1134}
1135
1136/*
1137 * Detach a file from its current interface (if attached at all) and attach
1138 * to the interface indicated by the name stored in ifr.
1139 * Return an errno or 0.
1140 */
1141static int
1142bpf_setif(struct bpf_d *d, struct ifreq *ifr)
1143{
1144	struct bpf_if *bp;
1145	struct ifnet *theywant;
1146
1147	theywant = ifunit(ifr->ifr_name);
1148	if (theywant == NULL || theywant->if_bpf == NULL)
1149		return (ENXIO);
1150
1151	bp = theywant->if_bpf;
1152	/*
1153	 * Allocate the packet buffers if we need to.
1154	 * If we're already attached to requested interface,
1155	 * just flush the buffer.
1156	 */
1157	if (d->bd_sbuf == NULL)
1158		bpf_allocbufs(d);
1159	if (bp != d->bd_bif) {
1160		if (d->bd_bif)
1161			/*
1162			 * Detach if attached to something else.
1163			 */
1164			bpf_detachd(d);
1165
1166		bpf_attachd(d, bp);
1167	}
1168	BPFD_LOCK(d);
1169	reset_d(d);
1170	BPFD_UNLOCK(d);
1171	return (0);
1172}
1173
1174/*
1175 * Support for select() and poll() system calls
1176 *
1177 * Return true iff the specific operation will not block indefinitely.
1178 * Otherwise, return false but make a note that a selwakeup() must be done.
1179 */
1180static int
1181bpfpoll(struct cdev *dev, int events, struct thread *td)
1182{
1183	struct bpf_d *d;
1184	int revents;
1185
1186	d = dev->si_drv1;
1187	if (d->bd_bif == NULL)
1188		return (ENXIO);
1189
1190	/*
1191	 * Refresh PID associated with this descriptor.
1192	 */
1193	revents = events & (POLLOUT | POLLWRNORM);
1194	BPFD_LOCK(d);
1195	d->bd_pid = td->td_proc->p_pid;
1196	if (events & (POLLIN | POLLRDNORM)) {
1197		if (bpf_ready(d))
1198			revents |= events & (POLLIN | POLLRDNORM);
1199		else {
1200			selrecord(td, &d->bd_sel);
1201			/* Start the read timeout if necessary. */
1202			if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1203				callout_reset(&d->bd_callout, d->bd_rtout,
1204				    bpf_timed_out, d);
1205				d->bd_state = BPF_WAITING;
1206			}
1207		}
1208	}
1209	BPFD_UNLOCK(d);
1210	return (revents);
1211}
1212
1213/*
1214 * Support for kevent() system call.  Register EVFILT_READ filters and
1215 * reject all others.
1216 */
1217int
1218bpfkqfilter(struct cdev *dev, struct knote *kn)
1219{
1220	struct bpf_d *d = (struct bpf_d *)dev->si_drv1;
1221
1222	if (kn->kn_filter != EVFILT_READ)
1223		return (1);
1224
1225	/*
1226	 * Refresh PID associated with this descriptor.
1227	 */
1228	BPFD_LOCK(d);
1229	d->bd_pid = curthread->td_proc->p_pid;
1230	kn->kn_fop = &bpfread_filtops;
1231	kn->kn_hook = d;
1232	knlist_add(&d->bd_sel.si_note, kn, 1);
1233	BPFD_UNLOCK(d);
1234
1235	return (0);
1236}
1237
1238static void
1239filt_bpfdetach(struct knote *kn)
1240{
1241	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1242
1243	knlist_remove(&d->bd_sel.si_note, kn, 0);
1244}
1245
1246static int
1247filt_bpfread(struct knote *kn, long hint)
1248{
1249	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1250	int ready;
1251
1252	BPFD_LOCK_ASSERT(d);
1253	ready = bpf_ready(d);
1254	if (ready) {
1255		kn->kn_data = d->bd_slen;
1256		if (d->bd_hbuf)
1257			kn->kn_data += d->bd_hlen;
1258	}
1259	else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1260		callout_reset(&d->bd_callout, d->bd_rtout,
1261		    bpf_timed_out, d);
1262		d->bd_state = BPF_WAITING;
1263	}
1264
1265	return (ready);
1266}
1267
1268/*
1269 * Incoming linkage from device drivers.  Process the packet pkt, of length
1270 * pktlen, which is stored in a contiguous buffer.  The packet is parsed
1271 * by each process' filter, and if accepted, stashed into the corresponding
1272 * buffer.
1273 */
1274void
1275bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
1276{
1277	struct bpf_d *d;
1278	u_int slen;
1279	int gottime;
1280	struct timeval tv;
1281
1282	gottime = 0;
1283	BPFIF_LOCK(bp);
1284	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1285		BPFD_LOCK(d);
1286		++d->bd_rcount;
1287#ifdef BPF_JITTER
1288		if (bpf_jitter_enable != 0 && d->bd_bfilter != NULL)
1289			slen = (*(d->bd_bfilter->func))(pkt, pktlen, pktlen);
1290		else
1291#endif
1292		slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
1293		if (slen != 0) {
1294			d->bd_fcount++;
1295			if (!gottime) {
1296				microtime(&tv);
1297				gottime = 1;
1298			}
1299#ifdef MAC
1300			if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
1301#endif
1302				catchpacket(d, pkt, pktlen, slen, bcopy, &tv);
1303		}
1304		BPFD_UNLOCK(d);
1305	}
1306	BPFIF_UNLOCK(bp);
1307}
1308
1309/*
1310 * Copy data from an mbuf chain into a buffer.  This code is derived
1311 * from m_copydata in sys/uipc_mbuf.c.
1312 */
1313static void
1314bpf_mcopy(const void *src_arg, void *dst_arg, size_t len)
1315{
1316	const struct mbuf *m;
1317	u_int count;
1318	u_char *dst;
1319
1320	m = src_arg;
1321	dst = dst_arg;
1322	while (len > 0) {
1323		if (m == NULL)
1324			panic("bpf_mcopy");
1325		count = min(m->m_len, len);
1326		bcopy(mtod(m, void *), dst, count);
1327		m = m->m_next;
1328		dst += count;
1329		len -= count;
1330	}
1331}
1332
1333#define	BPF_CHECK_DIRECTION(d, m) \
1334	if (((d)->bd_direction == BPF_D_IN && (m)->m_pkthdr.rcvif == NULL) || \
1335	    ((d)->bd_direction == BPF_D_OUT && (m)->m_pkthdr.rcvif != NULL))
1336
1337/*
1338 * Incoming linkage from device drivers, when packet is in an mbuf chain.
1339 */
1340void
1341bpf_mtap(struct bpf_if *bp, struct mbuf *m)
1342{
1343	struct bpf_d *d;
1344	u_int pktlen, slen;
1345	int gottime;
1346	struct timeval tv;
1347
1348	if (m->m_flags & M_SKIP_BPF) {
1349		m->m_flags &= ~M_SKIP_BPF;
1350		return;
1351	}
1352
1353	gottime = 0;
1354
1355	pktlen = m_length(m, NULL);
1356
1357	BPFIF_LOCK(bp);
1358	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1359		BPF_CHECK_DIRECTION(d, m)
1360			continue;
1361		BPFD_LOCK(d);
1362		++d->bd_rcount;
1363#ifdef BPF_JITTER
1364		/* XXX We cannot handle multiple mbufs. */
1365		if (bpf_jitter_enable != 0 && d->bd_bfilter != NULL &&
1366		    m->m_next == NULL)
1367			slen = (*(d->bd_bfilter->func))(mtod(m, u_char *),
1368			    pktlen, pktlen);
1369		else
1370#endif
1371		slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
1372		if (slen != 0) {
1373			d->bd_fcount++;
1374			if (!gottime) {
1375				microtime(&tv);
1376				gottime = 1;
1377			}
1378#ifdef MAC
1379			if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
1380#endif
1381				catchpacket(d, (u_char *)m, pktlen, slen,
1382				    bpf_mcopy, &tv);
1383		}
1384		BPFD_UNLOCK(d);
1385	}
1386	BPFIF_UNLOCK(bp);
1387}
1388
1389/*
1390 * Incoming linkage from device drivers, when packet is in
1391 * an mbuf chain and to be prepended by a contiguous header.
1392 */
1393void
1394bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
1395{
1396	struct mbuf mb;
1397	struct bpf_d *d;
1398	u_int pktlen, slen;
1399	int gottime;
1400	struct timeval tv;
1401
1402	if (m->m_flags & M_SKIP_BPF) {
1403		m->m_flags &= ~M_SKIP_BPF;
1404		return;
1405	}
1406
1407	gottime = 0;
1408
1409	pktlen = m_length(m, NULL);
1410	/*
1411	 * Craft on-stack mbuf suitable for passing to bpf_filter.
1412	 * Note that we cut corners here; we only setup what's
1413	 * absolutely needed--this mbuf should never go anywhere else.
1414	 */
1415	mb.m_next = m;
1416	mb.m_data = data;
1417	mb.m_len = dlen;
1418	pktlen += dlen;
1419
1420	BPFIF_LOCK(bp);
1421	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1422		BPF_CHECK_DIRECTION(d, m)
1423			continue;
1424		BPFD_LOCK(d);
1425		++d->bd_rcount;
1426		slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
1427		if (slen != 0) {
1428			d->bd_fcount++;
1429			if (!gottime) {
1430				microtime(&tv);
1431				gottime = 1;
1432			}
1433#ifdef MAC
1434			if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
1435#endif
1436				catchpacket(d, (u_char *)&mb, pktlen, slen,
1437				    bpf_mcopy, &tv);
1438		}
1439		BPFD_UNLOCK(d);
1440	}
1441	BPFIF_UNLOCK(bp);
1442}
1443
1444#undef	BPF_CHECK_DIRECTION
1445
1446/*
1447 * Move the packet data from interface memory (pkt) into the
1448 * store buffer.  "cpfn" is the routine called to do the actual data
1449 * transfer.  bcopy is passed in to copy contiguous chunks, while
1450 * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
1451 * pkt is really an mbuf.
1452 */
1453static void
1454catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
1455    void (*cpfn)(const void *, void *, size_t), struct timeval *tv)
1456{
1457	struct bpf_hdr *hp;
1458	int totlen, curlen;
1459	int hdrlen = d->bd_bif->bif_hdrlen;
1460	int do_wakeup = 0;
1461
1462	BPFD_LOCK_ASSERT(d);
1463	/*
1464	 * Figure out how many bytes to move.  If the packet is
1465	 * greater or equal to the snapshot length, transfer that
1466	 * much.  Otherwise, transfer the whole packet (unless
1467	 * we hit the buffer size limit).
1468	 */
1469	totlen = hdrlen + min(snaplen, pktlen);
1470	if (totlen > d->bd_bufsize)
1471		totlen = d->bd_bufsize;
1472
1473	/*
1474	 * Round up the end of the previous packet to the next longword.
1475	 */
1476	curlen = BPF_WORDALIGN(d->bd_slen);
1477	if (curlen + totlen > d->bd_bufsize) {
1478		/*
1479		 * This packet will overflow the storage buffer.
1480		 * Rotate the buffers if we can, then wakeup any
1481		 * pending reads.
1482		 */
1483		if (d->bd_fbuf == NULL) {
1484			/*
1485			 * We haven't completed the previous read yet,
1486			 * so drop the packet.
1487			 */
1488			++d->bd_dcount;
1489			return;
1490		}
1491		ROTATE_BUFFERS(d);
1492		do_wakeup = 1;
1493		curlen = 0;
1494	}
1495	else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
1496		/*
1497		 * Immediate mode is set, or the read timeout has
1498		 * already expired during a select call.  A packet
1499		 * arrived, so the reader should be woken up.
1500		 */
1501		do_wakeup = 1;
1502
1503	/*
1504	 * Append the bpf header.
1505	 */
1506	hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
1507	hp->bh_tstamp = *tv;
1508	hp->bh_datalen = pktlen;
1509	hp->bh_hdrlen = hdrlen;
1510	/*
1511	 * Copy the packet data into the store buffer and update its length.
1512	 */
1513	(*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
1514	d->bd_slen = curlen + totlen;
1515
1516	if (do_wakeup)
1517		bpf_wakeup(d);
1518}
1519
1520/*
1521 * Initialize all nonzero fields of a descriptor.
1522 */
1523static void
1524bpf_allocbufs(struct bpf_d *d)
1525{
1526
1527	KASSERT(d->bd_fbuf == NULL, ("bpf_allocbufs: bd_fbuf != NULL"));
1528	KASSERT(d->bd_sbuf == NULL, ("bpf_allocbufs: bd_sbuf != NULL"));
1529	KASSERT(d->bd_hbuf == NULL, ("bpf_allocbufs: bd_hbuf != NULL"));
1530
1531	d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1532	d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1533	d->bd_slen = 0;
1534	d->bd_hlen = 0;
1535}
1536
1537/*
1538 * Free buffers currently in use by a descriptor.
1539 * Called on close.
1540 */
1541static void
1542bpf_freed(struct bpf_d *d)
1543{
1544	/*
1545	 * We don't need to lock out interrupts since this descriptor has
1546	 * been detached from its interface and it yet hasn't been marked
1547	 * free.
1548	 */
1549	if (d->bd_sbuf != NULL) {
1550		free(d->bd_sbuf, M_BPF);
1551		if (d->bd_hbuf != NULL)
1552			free(d->bd_hbuf, M_BPF);
1553		if (d->bd_fbuf != NULL)
1554			free(d->bd_fbuf, M_BPF);
1555	}
1556	if (d->bd_rfilter) {
1557		free((caddr_t)d->bd_rfilter, M_BPF);
1558#ifdef BPF_JITTER
1559		bpf_destroy_jit_filter(d->bd_bfilter);
1560#endif
1561	}
1562	if (d->bd_wfilter)
1563		free((caddr_t)d->bd_wfilter, M_BPF);
1564	mtx_destroy(&d->bd_mtx);
1565}
1566
1567/*
1568 * Attach an interface to bpf.  dlt is the link layer type; hdrlen is the
1569 * fixed size of the link header (variable length headers not yet supported).
1570 */
1571void
1572bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
1573{
1574
1575	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
1576}
1577
1578/*
1579 * Attach an interface to bpf.  ifp is a pointer to the structure
1580 * defining the interface to be attached, dlt is the link layer type,
1581 * and hdrlen is the fixed size of the link header (variable length
1582 * headers are not yet supporrted).
1583 */
1584void
1585bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
1586{
1587	struct bpf_if *bp;
1588
1589	bp = malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
1590	if (bp == NULL)
1591		panic("bpfattach");
1592
1593	LIST_INIT(&bp->bif_dlist);
1594	bp->bif_ifp = ifp;
1595	bp->bif_dlt = dlt;
1596	mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF);
1597	KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized"));
1598	*driverp = bp;
1599
1600	mtx_lock(&bpf_mtx);
1601	LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
1602	mtx_unlock(&bpf_mtx);
1603
1604	/*
1605	 * Compute the length of the bpf header.  This is not necessarily
1606	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1607	 * that the network layer header begins on a longword boundary (for
1608	 * performance reasons and to alleviate alignment restrictions).
1609	 */
1610	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1611
1612	if (bootverbose)
1613		if_printf(ifp, "bpf attached\n");
1614}
1615
1616/*
1617 * Detach bpf from an interface.  This involves detaching each descriptor
1618 * associated with the interface, and leaving bd_bif NULL.  Notify each
1619 * descriptor as it's detached so that any sleepers wake up and get
1620 * ENXIO.
1621 */
1622void
1623bpfdetach(struct ifnet *ifp)
1624{
1625	struct bpf_if	*bp;
1626	struct bpf_d	*d;
1627
1628	/* Locate BPF interface information */
1629	mtx_lock(&bpf_mtx);
1630	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1631		if (ifp == bp->bif_ifp)
1632			break;
1633	}
1634
1635	/* Interface wasn't attached */
1636	if ((bp == NULL) || (bp->bif_ifp == NULL)) {
1637		mtx_unlock(&bpf_mtx);
1638		printf("bpfdetach: %s was not attached\n", ifp->if_xname);
1639		return;
1640	}
1641
1642	LIST_REMOVE(bp, bif_next);
1643	mtx_unlock(&bpf_mtx);
1644
1645	while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
1646		bpf_detachd(d);
1647		BPFD_LOCK(d);
1648		bpf_wakeup(d);
1649		BPFD_UNLOCK(d);
1650	}
1651
1652	mtx_destroy(&bp->bif_mtx);
1653	free(bp, M_BPF);
1654}
1655
1656/*
1657 * Get a list of available data link type of the interface.
1658 */
1659static int
1660bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
1661{
1662	int n, error;
1663	struct ifnet *ifp;
1664	struct bpf_if *bp;
1665
1666	ifp = d->bd_bif->bif_ifp;
1667	n = 0;
1668	error = 0;
1669	mtx_lock(&bpf_mtx);
1670	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1671		if (bp->bif_ifp != ifp)
1672			continue;
1673		if (bfl->bfl_list != NULL) {
1674			if (n >= bfl->bfl_len) {
1675				mtx_unlock(&bpf_mtx);
1676				return (ENOMEM);
1677			}
1678			error = copyout(&bp->bif_dlt,
1679			    bfl->bfl_list + n, sizeof(u_int));
1680		}
1681		n++;
1682	}
1683	mtx_unlock(&bpf_mtx);
1684	bfl->bfl_len = n;
1685	return (error);
1686}
1687
1688/*
1689 * Set the data link type of a BPF instance.
1690 */
1691static int
1692bpf_setdlt(struct bpf_d *d, u_int dlt)
1693{
1694	int error, opromisc;
1695	struct ifnet *ifp;
1696	struct bpf_if *bp;
1697
1698	if (d->bd_bif->bif_dlt == dlt)
1699		return (0);
1700	ifp = d->bd_bif->bif_ifp;
1701	mtx_lock(&bpf_mtx);
1702	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1703		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
1704			break;
1705	}
1706	mtx_unlock(&bpf_mtx);
1707	if (bp != NULL) {
1708		opromisc = d->bd_promisc;
1709		bpf_detachd(d);
1710		bpf_attachd(d, bp);
1711		BPFD_LOCK(d);
1712		reset_d(d);
1713		BPFD_UNLOCK(d);
1714		if (opromisc) {
1715			error = ifpromisc(bp->bif_ifp, 1);
1716			if (error)
1717				if_printf(bp->bif_ifp,
1718					"bpf_setdlt: ifpromisc failed (%d)\n",
1719					error);
1720			else
1721				d->bd_promisc = 1;
1722		}
1723	}
1724	return (bp == NULL ? EINVAL : 0);
1725}
1726
1727static void
1728bpf_clone(void *arg, struct ucred *cred, char *name, int namelen,
1729    struct cdev **dev)
1730{
1731	int u;
1732
1733	if (*dev != NULL)
1734		return;
1735	if (dev_stdclone(name, NULL, "bpf", &u) != 1)
1736		return;
1737	*dev = make_dev(&bpf_cdevsw, unit2minor(u), UID_ROOT, GID_WHEEL, 0600,
1738	    "bpf%d", u);
1739	dev_ref(*dev);
1740	(*dev)->si_flags |= SI_CHEAPCLONE;
1741	return;
1742}
1743
1744static void
1745bpf_drvinit(void *unused)
1746{
1747
1748	mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
1749	LIST_INIT(&bpf_iflist);
1750	EVENTHANDLER_REGISTER(dev_clone, bpf_clone, 0, 1000);
1751}
1752
1753static void
1754bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
1755{
1756
1757	bzero(d, sizeof(*d));
1758	BPFD_LOCK_ASSERT(bd);
1759	d->bd_immediate = bd->bd_immediate;
1760	d->bd_promisc = bd->bd_promisc;
1761	d->bd_hdrcmplt = bd->bd_hdrcmplt;
1762	d->bd_direction = bd->bd_direction;
1763	d->bd_feedback = bd->bd_feedback;
1764	d->bd_async = bd->bd_async;
1765	d->bd_rcount = bd->bd_rcount;
1766	d->bd_dcount = bd->bd_dcount;
1767	d->bd_fcount = bd->bd_fcount;
1768	d->bd_sig = bd->bd_sig;
1769	d->bd_slen = bd->bd_slen;
1770	d->bd_hlen = bd->bd_hlen;
1771	d->bd_bufsize = bd->bd_bufsize;
1772	d->bd_pid = bd->bd_pid;
1773	strlcpy(d->bd_ifname,
1774	    bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
1775	d->bd_locked = bd->bd_locked;
1776}
1777
1778static int
1779bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
1780{
1781	struct xbpf_d *xbdbuf, *xbd;
1782	int index, error;
1783	struct bpf_if *bp;
1784	struct bpf_d *bd;
1785
1786	/*
1787	 * XXX This is not technically correct. It is possible for non
1788	 * privileged users to open bpf devices. It would make sense
1789	 * if the users who opened the devices were able to retrieve
1790	 * the statistics for them, too.
1791	 */
1792	error = priv_check(req->td, PRIV_NET_BPF);
1793	if (error)
1794		return (error);
1795	if (req->oldptr == NULL)
1796		return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd)));
1797	if (bpf_bpfd_cnt == 0)
1798		return (SYSCTL_OUT(req, 0, 0));
1799	xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK);
1800	mtx_lock(&bpf_mtx);
1801	if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) {
1802		mtx_unlock(&bpf_mtx);
1803		free(xbdbuf, M_BPF);
1804		return (ENOMEM);
1805	}
1806	index = 0;
1807	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1808		BPFIF_LOCK(bp);
1809		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
1810			xbd = &xbdbuf[index++];
1811			BPFD_LOCK(bd);
1812			bpfstats_fill_xbpf(xbd, bd);
1813			BPFD_UNLOCK(bd);
1814		}
1815		BPFIF_UNLOCK(bp);
1816	}
1817	mtx_unlock(&bpf_mtx);
1818	error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
1819	free(xbdbuf, M_BPF);
1820	return (error);
1821}
1822
1823SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL)
1824
1825#else /* !DEV_BPF && !NETGRAPH_BPF */
1826/*
1827 * NOP stubs to allow bpf-using drivers to load and function.
1828 *
1829 * A 'better' implementation would allow the core bpf functionality
1830 * to be loaded at runtime.
1831 */
1832static struct bpf_if bp_null;
1833
1834void
1835bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
1836{
1837}
1838
1839void
1840bpf_mtap(struct bpf_if *bp, struct mbuf *m)
1841{
1842}
1843
1844void
1845bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m)
1846{
1847}
1848
1849void
1850bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
1851{
1852
1853	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
1854}
1855
1856void
1857bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
1858{
1859
1860	*driverp = &bp_null;
1861}
1862
1863void
1864bpfdetach(struct ifnet *ifp)
1865{
1866}
1867
1868u_int
1869bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
1870{
1871	return -1;	/* "no filter" behaviour */
1872}
1873
1874int
1875bpf_validate(const struct bpf_insn *f, int len)
1876{
1877	return 0;		/* false */
1878}
1879
1880#endif /* !DEV_BPF && !NETGRAPH_BPF */
1881