bpf.c revision 149376
1/*-
2 * Copyright (c) 1990, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from the Stanford/CMU enet packet filter,
6 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
7 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
8 * Berkeley Laboratory.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *      @(#)bpf.c	8.4 (Berkeley) 1/9/95
35 *
36 * $FreeBSD: head/sys/net/bpf.c 149376 2005-08-22 19:35:48Z csjp $
37 */
38
39#include "opt_bpf.h"
40#include "opt_mac.h"
41#include "opt_netgraph.h"
42
43#include <sys/types.h>
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/conf.h>
47#include <sys/fcntl.h>
48#include <sys/mac.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/time.h>
52#include <sys/proc.h>
53#include <sys/signalvar.h>
54#include <sys/filio.h>
55#include <sys/sockio.h>
56#include <sys/ttycom.h>
57#include <sys/uio.h>
58
59#include <sys/event.h>
60#include <sys/file.h>
61#include <sys/poll.h>
62#include <sys/proc.h>
63
64#include <sys/socket.h>
65
66#include <net/if.h>
67#include <net/bpf.h>
68#include <net/bpfdesc.h>
69
70#include <netinet/in.h>
71#include <netinet/if_ether.h>
72#include <sys/kernel.h>
73#include <sys/sysctl.h>
74
75static MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
76
77#if defined(DEV_BPF) || defined(NETGRAPH_BPF)
78
79#define PRINET  26			/* interruptible */
80
81/*
82 * bpf_iflist is a list of BPF interface structures, each corresponding to a
83 * specific DLT.  The same network interface might have several BPF interface
84 * structures registered by different layers in the stack (i.e., 802.11
85 * frames, ethernet frames, etc).
86 */
87static LIST_HEAD(, bpf_if)	bpf_iflist;
88static struct mtx	bpf_mtx;		/* bpf global lock */
89static int		bpf_bpfd_cnt;
90
91static int	bpf_allocbufs(struct bpf_d *);
92static void	bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
93static void	bpf_detachd(struct bpf_d *d);
94static void	bpf_freed(struct bpf_d *);
95static void	bpf_mcopy(const void *, void *, size_t);
96static int	bpf_movein(struct uio *, int, int,
97		    struct mbuf **, struct sockaddr *, struct bpf_insn *);
98static int	bpf_setif(struct bpf_d *, struct ifreq *);
99static void	bpf_timed_out(void *);
100static __inline void
101		bpf_wakeup(struct bpf_d *);
102static void	catchpacket(struct bpf_d *, u_char *, u_int,
103		    u_int, void (*)(const void *, void *, size_t));
104static void	reset_d(struct bpf_d *);
105static int	 bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
106static int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
107static int	bpf_setdlt(struct bpf_d *, u_int);
108static void	filt_bpfdetach(struct knote *);
109static int	filt_bpfread(struct knote *, long);
110static void	bpf_drvinit(void *);
111static void	bpf_clone(void *, struct ucred *, char *, int, struct cdev **);
112static int	bpf_stats_sysctl(SYSCTL_HANDLER_ARGS);
113
114/*
115 * The default read buffer size is patchable.
116 */
117SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl");
118static int bpf_bufsize = 4096;
119SYSCTL_INT(_net_bpf, OID_AUTO, bufsize, CTLFLAG_RW,
120    &bpf_bufsize, 0, "");
121static int bpf_maxbufsize = BPF_MAXBUFSIZE;
122SYSCTL_INT(_net_bpf, OID_AUTO, maxbufsize, CTLFLAG_RW,
123    &bpf_maxbufsize, 0, "");
124static int bpf_maxinsns = BPF_MAXINSNS;
125SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
126    &bpf_maxinsns, 0, "Maximum bpf program instructions");
127SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_RW,
128    bpf_stats_sysctl, "bpf statistics portal");
129
130static	d_open_t	bpfopen;
131static	d_close_t	bpfclose;
132static	d_read_t	bpfread;
133static	d_write_t	bpfwrite;
134static	d_ioctl_t	bpfioctl;
135static	d_poll_t	bpfpoll;
136static	d_kqfilter_t	bpfkqfilter;
137
138static struct cdevsw bpf_cdevsw = {
139	.d_version =	D_VERSION,
140	.d_flags =	D_NEEDGIANT,
141	.d_open =	bpfopen,
142	.d_close =	bpfclose,
143	.d_read =	bpfread,
144	.d_write =	bpfwrite,
145	.d_ioctl =	bpfioctl,
146	.d_poll =	bpfpoll,
147	.d_name =	"bpf",
148	.d_kqfilter =	bpfkqfilter,
149};
150
151static struct filterops bpfread_filtops =
152	{ 1, NULL, filt_bpfdetach, filt_bpfread };
153
154static int
155bpf_movein(uio, linktype, mtu, mp, sockp, wfilter)
156	struct uio *uio;
157	int linktype;
158	int mtu;
159	struct mbuf **mp;
160	struct sockaddr *sockp;
161	struct bpf_insn *wfilter;
162{
163	struct mbuf *m;
164	int error;
165	int len;
166	int hlen;
167	int slen;
168
169	/*
170	 * Build a sockaddr based on the data link layer type.
171	 * We do this at this level because the ethernet header
172	 * is copied directly into the data field of the sockaddr.
173	 * In the case of SLIP, there is no header and the packet
174	 * is forwarded as is.
175	 * Also, we are careful to leave room at the front of the mbuf
176	 * for the link level header.
177	 */
178	switch (linktype) {
179
180	case DLT_SLIP:
181		sockp->sa_family = AF_INET;
182		hlen = 0;
183		break;
184
185	case DLT_EN10MB:
186		sockp->sa_family = AF_UNSPEC;
187		/* XXX Would MAXLINKHDR be better? */
188		hlen = ETHER_HDR_LEN;
189		break;
190
191	case DLT_FDDI:
192		sockp->sa_family = AF_IMPLINK;
193		hlen = 0;
194		break;
195
196	case DLT_RAW:
197		sockp->sa_family = AF_UNSPEC;
198		hlen = 0;
199		break;
200
201	case DLT_NULL:
202		/*
203		 * null interface types require a 4 byte pseudo header which
204		 * corresponds to the address family of the packet.
205		 */
206		sockp->sa_family = AF_UNSPEC;
207		hlen = 4;
208		break;
209
210	case DLT_ATM_RFC1483:
211		/*
212		 * en atm driver requires 4-byte atm pseudo header.
213		 * though it isn't standard, vpi:vci needs to be
214		 * specified anyway.
215		 */
216		sockp->sa_family = AF_UNSPEC;
217		hlen = 12;	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
218		break;
219
220	case DLT_PPP:
221		sockp->sa_family = AF_UNSPEC;
222		hlen = 4;	/* This should match PPP_HDRLEN */
223		break;
224
225	default:
226		return (EIO);
227	}
228
229	len = uio->uio_resid;
230
231	if (len - hlen > mtu)
232		return (EMSGSIZE);
233
234	if ((unsigned)len > MCLBYTES)
235		return (EIO);
236
237	if (len > MHLEN) {
238		m = m_getcl(M_TRYWAIT, MT_DATA, M_PKTHDR);
239	} else {
240		MGETHDR(m, M_TRYWAIT, MT_DATA);
241	}
242	if (m == NULL)
243		return (ENOBUFS);
244	m->m_pkthdr.len = m->m_len = len;
245	m->m_pkthdr.rcvif = NULL;
246	*mp = m;
247
248	if (m->m_len < hlen) {
249		error = EPERM;
250		goto bad;
251	}
252
253	error = uiomove(mtod(m, u_char *), len, uio);
254	if (error)
255		goto bad;
256
257	slen = bpf_filter(wfilter, mtod(m, u_char *), len, len);
258	if (slen == 0) {
259		error = EPERM;
260		goto bad;
261	}
262
263	/*
264	 * Make room for link header, and copy it to sockaddr
265	 */
266	if (hlen != 0) {
267		bcopy(m->m_data, sockp->sa_data, hlen);
268		m->m_pkthdr.len -= hlen;
269		m->m_len -= hlen;
270#if BSD >= 199103
271		m->m_data += hlen; /* XXX */
272#else
273		m->m_off += hlen;
274#endif
275	}
276
277	return (0);
278bad:
279	m_freem(m);
280	return (error);
281}
282
283/*
284 * Attach file to the bpf interface, i.e. make d listen on bp.
285 */
286static void
287bpf_attachd(d, bp)
288	struct bpf_d *d;
289	struct bpf_if *bp;
290{
291	/*
292	 * Point d at bp, and add d to the interface's list of listeners.
293	 * Finally, point the driver's bpf cookie at the interface so
294	 * it will divert packets to bpf.
295	 */
296	BPFIF_LOCK(bp);
297	d->bd_bif = bp;
298	LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
299
300	bpf_bpfd_cnt++;
301	*bp->bif_driverp = bp;
302	BPFIF_UNLOCK(bp);
303}
304
305/*
306 * Detach a file from its interface.
307 */
308static void
309bpf_detachd(d)
310	struct bpf_d *d;
311{
312	int error;
313	struct bpf_if *bp;
314	struct ifnet *ifp;
315
316	bp = d->bd_bif;
317	BPFIF_LOCK(bp);
318	BPFD_LOCK(d);
319	ifp = d->bd_bif->bif_ifp;
320
321	/*
322	 * Remove d from the interface's descriptor list.
323	 */
324	LIST_REMOVE(d, bd_next);
325
326	bpf_bpfd_cnt--;
327	/*
328	 * Let the driver know that there are no more listeners.
329	 */
330	if (LIST_EMPTY(&bp->bif_dlist))
331		*bp->bif_driverp = NULL;
332
333	d->bd_bif = NULL;
334	BPFD_UNLOCK(d);
335	BPFIF_UNLOCK(bp);
336
337	/*
338	 * Check if this descriptor had requested promiscuous mode.
339	 * If so, turn it off.
340	 */
341	if (d->bd_promisc) {
342		d->bd_promisc = 0;
343		error = ifpromisc(ifp, 0);
344		if (error != 0 && error != ENXIO) {
345			/*
346			 * ENXIO can happen if a pccard is unplugged
347			 * Something is really wrong if we were able to put
348			 * the driver into promiscuous mode, but can't
349			 * take it out.
350			 */
351			if_printf(bp->bif_ifp,
352				"bpf_detach: ifpromisc failed (%d)\n", error);
353		}
354	}
355}
356
357/*
358 * Open ethernet device.  Returns ENXIO for illegal minor device number,
359 * EBUSY if file is open by another process.
360 */
361/* ARGSUSED */
362static	int
363bpfopen(dev, flags, fmt, td)
364	struct cdev *dev;
365	int flags;
366	int fmt;
367	struct thread *td;
368{
369	struct bpf_d *d;
370
371	mtx_lock(&bpf_mtx);
372	d = dev->si_drv1;
373	/*
374	 * Each minor can be opened by only one process.  If the requested
375	 * minor is in use, return EBUSY.
376	 */
377	if (d != NULL) {
378		mtx_unlock(&bpf_mtx);
379		return (EBUSY);
380	}
381	dev->si_drv1 = (struct bpf_d *)~0;	/* mark device in use */
382	mtx_unlock(&bpf_mtx);
383
384	if ((dev->si_flags & SI_NAMED) == 0)
385		make_dev(&bpf_cdevsw, minor(dev), UID_ROOT, GID_WHEEL, 0600,
386		    "bpf%d", dev2unit(dev));
387	MALLOC(d, struct bpf_d *, sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
388	dev->si_drv1 = d;
389	d->bd_bufsize = bpf_bufsize;
390	d->bd_sig = SIGIO;
391	d->bd_seesent = 1;
392	d->bd_pid = td->td_proc->p_pid;
393	strlcpy(d->bd_pcomm, td->td_proc->p_comm, MAXCOMLEN);
394#ifdef MAC
395	mac_init_bpfdesc(d);
396	mac_create_bpfdesc(td->td_ucred, d);
397#endif
398	mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
399	callout_init(&d->bd_callout, NET_CALLOUT_MPSAFE);
400	knlist_init(&d->bd_sel.si_note, &d->bd_mtx, NULL, NULL, NULL);
401
402	return (0);
403}
404
405/*
406 * Close the descriptor by detaching it from its interface,
407 * deallocating its buffers, and marking it free.
408 */
409/* ARGSUSED */
410static	int
411bpfclose(dev, flags, fmt, td)
412	struct cdev *dev;
413	int flags;
414	int fmt;
415	struct thread *td;
416{
417	struct bpf_d *d = dev->si_drv1;
418
419	BPFD_LOCK(d);
420	if (d->bd_state == BPF_WAITING)
421		callout_stop(&d->bd_callout);
422	d->bd_state = BPF_IDLE;
423	BPFD_UNLOCK(d);
424	funsetown(&d->bd_sigio);
425	mtx_lock(&bpf_mtx);
426	if (d->bd_bif)
427		bpf_detachd(d);
428	mtx_unlock(&bpf_mtx);
429	selwakeuppri(&d->bd_sel, PRINET);
430#ifdef MAC
431	mac_destroy_bpfdesc(d);
432#endif /* MAC */
433	knlist_destroy(&d->bd_sel.si_note);
434	bpf_freed(d);
435	dev->si_drv1 = NULL;
436	free(d, M_BPF);
437
438	return (0);
439}
440
441
442/*
443 * Rotate the packet buffers in descriptor d.  Move the store buffer
444 * into the hold slot, and the free buffer into the store slot.
445 * Zero the length of the new store buffer.
446 */
447#define ROTATE_BUFFERS(d) \
448	(d)->bd_hbuf = (d)->bd_sbuf; \
449	(d)->bd_hlen = (d)->bd_slen; \
450	(d)->bd_sbuf = (d)->bd_fbuf; \
451	(d)->bd_slen = 0; \
452	(d)->bd_fbuf = NULL;
453/*
454 *  bpfread - read next chunk of packets from buffers
455 */
456static	int
457bpfread(dev, uio, ioflag)
458	struct cdev *dev;
459	struct uio *uio;
460	int ioflag;
461{
462	struct bpf_d *d = dev->si_drv1;
463	int timed_out;
464	int error;
465
466	/*
467	 * Restrict application to use a buffer the same size as
468	 * as kernel buffers.
469	 */
470	if (uio->uio_resid != d->bd_bufsize)
471		return (EINVAL);
472
473	BPFD_LOCK(d);
474	if (d->bd_state == BPF_WAITING)
475		callout_stop(&d->bd_callout);
476	timed_out = (d->bd_state == BPF_TIMED_OUT);
477	d->bd_state = BPF_IDLE;
478	/*
479	 * If the hold buffer is empty, then do a timed sleep, which
480	 * ends when the timeout expires or when enough packets
481	 * have arrived to fill the store buffer.
482	 */
483	while (d->bd_hbuf == NULL) {
484		if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
485			/*
486			 * A packet(s) either arrived since the previous
487			 * read or arrived while we were asleep.
488			 * Rotate the buffers and return what's here.
489			 */
490			ROTATE_BUFFERS(d);
491			break;
492		}
493
494		/*
495		 * No data is available, check to see if the bpf device
496		 * is still pointed at a real interface.  If not, return
497		 * ENXIO so that the userland process knows to rebind
498		 * it before using it again.
499		 */
500		if (d->bd_bif == NULL) {
501			BPFD_UNLOCK(d);
502			return (ENXIO);
503		}
504
505		if (ioflag & O_NONBLOCK) {
506			BPFD_UNLOCK(d);
507			return (EWOULDBLOCK);
508		}
509		error = msleep(d, &d->bd_mtx, PRINET|PCATCH,
510		     "bpf", d->bd_rtout);
511		if (error == EINTR || error == ERESTART) {
512			BPFD_UNLOCK(d);
513			return (error);
514		}
515		if (error == EWOULDBLOCK) {
516			/*
517			 * On a timeout, return what's in the buffer,
518			 * which may be nothing.  If there is something
519			 * in the store buffer, we can rotate the buffers.
520			 */
521			if (d->bd_hbuf)
522				/*
523				 * We filled up the buffer in between
524				 * getting the timeout and arriving
525				 * here, so we don't need to rotate.
526				 */
527				break;
528
529			if (d->bd_slen == 0) {
530				BPFD_UNLOCK(d);
531				return (0);
532			}
533			ROTATE_BUFFERS(d);
534			break;
535		}
536	}
537	/*
538	 * At this point, we know we have something in the hold slot.
539	 */
540	BPFD_UNLOCK(d);
541
542	/*
543	 * Move data from hold buffer into user space.
544	 * We know the entire buffer is transferred since
545	 * we checked above that the read buffer is bpf_bufsize bytes.
546	 */
547	error = uiomove(d->bd_hbuf, d->bd_hlen, uio);
548
549	BPFD_LOCK(d);
550	d->bd_fbuf = d->bd_hbuf;
551	d->bd_hbuf = NULL;
552	d->bd_hlen = 0;
553	BPFD_UNLOCK(d);
554
555	return (error);
556}
557
558
559/*
560 * If there are processes sleeping on this descriptor, wake them up.
561 */
562static __inline void
563bpf_wakeup(d)
564	struct bpf_d *d;
565{
566
567	BPFD_LOCK_ASSERT(d);
568	if (d->bd_state == BPF_WAITING) {
569		callout_stop(&d->bd_callout);
570		d->bd_state = BPF_IDLE;
571	}
572	wakeup(d);
573	if (d->bd_async && d->bd_sig && d->bd_sigio)
574		pgsigio(&d->bd_sigio, d->bd_sig, 0);
575
576	selwakeuppri(&d->bd_sel, PRINET);
577	KNOTE_LOCKED(&d->bd_sel.si_note, 0);
578}
579
580static void
581bpf_timed_out(arg)
582	void *arg;
583{
584	struct bpf_d *d = (struct bpf_d *)arg;
585
586	BPFD_LOCK(d);
587	if (d->bd_state == BPF_WAITING) {
588		d->bd_state = BPF_TIMED_OUT;
589		if (d->bd_slen != 0)
590			bpf_wakeup(d);
591	}
592	BPFD_UNLOCK(d);
593}
594
595static	int
596bpfwrite(dev, uio, ioflag)
597	struct cdev *dev;
598	struct uio *uio;
599	int ioflag;
600{
601	struct bpf_d *d = dev->si_drv1;
602	struct ifnet *ifp;
603	struct mbuf *m;
604	int error;
605	struct sockaddr dst;
606
607	if (d->bd_bif == NULL)
608		return (ENXIO);
609
610	ifp = d->bd_bif->bif_ifp;
611
612	if ((ifp->if_flags & IFF_UP) == 0)
613		return (ENETDOWN);
614
615	if (uio->uio_resid == 0)
616		return (0);
617
618	bzero(&dst, sizeof(dst));
619	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp->if_mtu,
620	    &m, &dst, d->bd_wfilter);
621	if (error)
622		return (error);
623
624	if (d->bd_hdrcmplt)
625		dst.sa_family = pseudo_AF_HDRCMPLT;
626
627#ifdef MAC
628	BPFD_LOCK(d);
629	mac_create_mbuf_from_bpfdesc(d, m);
630	BPFD_UNLOCK(d);
631#endif
632	NET_LOCK_GIANT();
633	error = (*ifp->if_output)(ifp, m, &dst, NULL);
634	NET_UNLOCK_GIANT();
635	/*
636	 * The driver frees the mbuf.
637	 */
638	return (error);
639}
640
641/*
642 * Reset a descriptor by flushing its packet buffer and clearing the
643 * receive and drop counts.
644 */
645static void
646reset_d(d)
647	struct bpf_d *d;
648{
649
650	mtx_assert(&d->bd_mtx, MA_OWNED);
651	if (d->bd_hbuf) {
652		/* Free the hold buffer. */
653		d->bd_fbuf = d->bd_hbuf;
654		d->bd_hbuf = NULL;
655	}
656	d->bd_slen = 0;
657	d->bd_hlen = 0;
658	d->bd_rcount = 0;
659	d->bd_dcount = 0;
660	d->bd_fcount = 0;
661}
662
663/*
664 *  FIONREAD		Check for read packet available.
665 *  SIOCGIFADDR		Get interface address - convenient hook to driver.
666 *  BIOCGBLEN		Get buffer len [for read()].
667 *  BIOCSETF		Set ethernet read filter.
668 *  BIOCSETWF		Set ethernet write filter.
669 *  BIOCFLUSH		Flush read packet buffer.
670 *  BIOCPROMISC		Put interface into promiscuous mode.
671 *  BIOCGDLT		Get link layer type.
672 *  BIOCGETIF		Get interface name.
673 *  BIOCSETIF		Set interface.
674 *  BIOCSRTIMEOUT	Set read timeout.
675 *  BIOCGRTIMEOUT	Get read timeout.
676 *  BIOCGSTATS		Get packet stats.
677 *  BIOCIMMEDIATE	Set immediate mode.
678 *  BIOCVERSION		Get filter language version.
679 *  BIOCGHDRCMPLT	Get "header already complete" flag
680 *  BIOCSHDRCMPLT	Set "header already complete" flag
681 *  BIOCGSEESENT	Get "see packets sent" flag
682 *  BIOCSSEESENT	Set "see packets sent" flag
683 *  BIOCLOCK		Set "locked" flag
684 */
685/* ARGSUSED */
686static	int
687bpfioctl(dev, cmd, addr, flags, td)
688	struct cdev *dev;
689	u_long cmd;
690	caddr_t addr;
691	int flags;
692	struct thread *td;
693{
694	struct bpf_d *d = dev->si_drv1;
695	int error = 0;
696
697	BPFD_LOCK(d);
698	if (d->bd_state == BPF_WAITING)
699		callout_stop(&d->bd_callout);
700	d->bd_state = BPF_IDLE;
701	BPFD_UNLOCK(d);
702
703	if (d->bd_locked == 1) {
704		switch (cmd) {
705		case BIOCGBLEN:
706		case BIOCFLUSH:
707		case BIOCGDLT:
708		case BIOCGDLTLIST:
709		case BIOCGETIF:
710		case BIOCGRTIMEOUT:
711		case BIOCGSTATS:
712		case BIOCVERSION:
713		case BIOCGRSIG:
714		case BIOCGHDRCMPLT:
715		case FIONREAD:
716		case BIOCLOCK:
717		case BIOCSRTIMEOUT:
718		case BIOCIMMEDIATE:
719		case TIOCGPGRP:
720			break;
721		default:
722			return (EPERM);
723		}
724	}
725	switch (cmd) {
726
727	default:
728		error = EINVAL;
729		break;
730
731	/*
732	 * Check for read packet available.
733	 */
734	case FIONREAD:
735		{
736			int n;
737
738			BPFD_LOCK(d);
739			n = d->bd_slen;
740			if (d->bd_hbuf)
741				n += d->bd_hlen;
742			BPFD_UNLOCK(d);
743
744			*(int *)addr = n;
745			break;
746		}
747
748	case SIOCGIFADDR:
749		{
750			struct ifnet *ifp;
751
752			if (d->bd_bif == NULL)
753				error = EINVAL;
754			else {
755				ifp = d->bd_bif->bif_ifp;
756				error = (*ifp->if_ioctl)(ifp, cmd, addr);
757			}
758			break;
759		}
760
761	/*
762	 * Get buffer len [for read()].
763	 */
764	case BIOCGBLEN:
765		*(u_int *)addr = d->bd_bufsize;
766		break;
767
768	/*
769	 * Set buffer length.
770	 */
771	case BIOCSBLEN:
772		if (d->bd_bif != NULL)
773			error = EINVAL;
774		else {
775			u_int size = *(u_int *)addr;
776
777			if (size > bpf_maxbufsize)
778				*(u_int *)addr = size = bpf_maxbufsize;
779			else if (size < BPF_MINBUFSIZE)
780				*(u_int *)addr = size = BPF_MINBUFSIZE;
781			d->bd_bufsize = size;
782		}
783		break;
784
785	/*
786	 * Set link layer read filter.
787	 */
788	case BIOCSETF:
789	case BIOCSETWF:
790		error = bpf_setf(d, (struct bpf_program *)addr, cmd);
791		break;
792
793	/*
794	 * Flush read packet buffer.
795	 */
796	case BIOCFLUSH:
797		BPFD_LOCK(d);
798		reset_d(d);
799		BPFD_UNLOCK(d);
800		break;
801
802	/*
803	 * Put interface into promiscuous mode.
804	 */
805	case BIOCPROMISC:
806		if (d->bd_bif == NULL) {
807			/*
808			 * No interface attached yet.
809			 */
810			error = EINVAL;
811			break;
812		}
813		if (d->bd_promisc == 0) {
814			mtx_lock(&Giant);
815			error = ifpromisc(d->bd_bif->bif_ifp, 1);
816			mtx_unlock(&Giant);
817			if (error == 0)
818				d->bd_promisc = 1;
819		}
820		break;
821
822	/*
823	 * Get current data link type.
824	 */
825	case BIOCGDLT:
826		if (d->bd_bif == NULL)
827			error = EINVAL;
828		else
829			*(u_int *)addr = d->bd_bif->bif_dlt;
830		break;
831
832	/*
833	 * Get a list of supported data link types.
834	 */
835	case BIOCGDLTLIST:
836		if (d->bd_bif == NULL)
837			error = EINVAL;
838		else
839			error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
840		break;
841
842	/*
843	 * Set data link type.
844	 */
845	case BIOCSDLT:
846		if (d->bd_bif == NULL)
847			error = EINVAL;
848		else
849			error = bpf_setdlt(d, *(u_int *)addr);
850		break;
851
852	/*
853	 * Get interface name.
854	 */
855	case BIOCGETIF:
856		if (d->bd_bif == NULL)
857			error = EINVAL;
858		else {
859			struct ifnet *const ifp = d->bd_bif->bif_ifp;
860			struct ifreq *const ifr = (struct ifreq *)addr;
861
862			strlcpy(ifr->ifr_name, ifp->if_xname,
863			    sizeof(ifr->ifr_name));
864		}
865		break;
866
867	/*
868	 * Set interface.
869	 */
870	case BIOCSETIF:
871		error = bpf_setif(d, (struct ifreq *)addr);
872		break;
873
874	/*
875	 * Set read timeout.
876	 */
877	case BIOCSRTIMEOUT:
878		{
879			struct timeval *tv = (struct timeval *)addr;
880
881			/*
882			 * Subtract 1 tick from tvtohz() since this isn't
883			 * a one-shot timer.
884			 */
885			if ((error = itimerfix(tv)) == 0)
886				d->bd_rtout = tvtohz(tv) - 1;
887			break;
888		}
889
890	/*
891	 * Get read timeout.
892	 */
893	case BIOCGRTIMEOUT:
894		{
895			struct timeval *tv = (struct timeval *)addr;
896
897			tv->tv_sec = d->bd_rtout / hz;
898			tv->tv_usec = (d->bd_rtout % hz) * tick;
899			break;
900		}
901
902	/*
903	 * Get packet stats.
904	 */
905	case BIOCGSTATS:
906		{
907			struct bpf_stat *bs = (struct bpf_stat *)addr;
908
909			bs->bs_recv = d->bd_rcount;
910			bs->bs_drop = d->bd_dcount;
911			break;
912		}
913
914	/*
915	 * Set immediate mode.
916	 */
917	case BIOCIMMEDIATE:
918		d->bd_immediate = *(u_int *)addr;
919		break;
920
921	case BIOCVERSION:
922		{
923			struct bpf_version *bv = (struct bpf_version *)addr;
924
925			bv->bv_major = BPF_MAJOR_VERSION;
926			bv->bv_minor = BPF_MINOR_VERSION;
927			break;
928		}
929
930	/*
931	 * Get "header already complete" flag
932	 */
933	case BIOCGHDRCMPLT:
934		*(u_int *)addr = d->bd_hdrcmplt;
935		break;
936
937	case BIOCLOCK:
938		d->bd_locked = 1;
939		break;
940	/*
941	 * Set "header already complete" flag
942	 */
943	case BIOCSHDRCMPLT:
944		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
945		break;
946
947	/*
948	 * Get "see sent packets" flag
949	 */
950	case BIOCGSEESENT:
951		*(u_int *)addr = d->bd_seesent;
952		break;
953
954	/*
955	 * Set "see sent packets" flag
956	 */
957	case BIOCSSEESENT:
958		d->bd_seesent = *(u_int *)addr;
959		break;
960
961	case FIONBIO:		/* Non-blocking I/O */
962		break;
963
964	case FIOASYNC:		/* Send signal on receive packets */
965		d->bd_async = *(int *)addr;
966		break;
967
968	case FIOSETOWN:
969		error = fsetown(*(int *)addr, &d->bd_sigio);
970		break;
971
972	case FIOGETOWN:
973		*(int *)addr = fgetown(&d->bd_sigio);
974		break;
975
976	/* This is deprecated, FIOSETOWN should be used instead. */
977	case TIOCSPGRP:
978		error = fsetown(-(*(int *)addr), &d->bd_sigio);
979		break;
980
981	/* This is deprecated, FIOGETOWN should be used instead. */
982	case TIOCGPGRP:
983		*(int *)addr = -fgetown(&d->bd_sigio);
984		break;
985
986	case BIOCSRSIG:		/* Set receive signal */
987		{
988			u_int sig;
989
990			sig = *(u_int *)addr;
991
992			if (sig >= NSIG)
993				error = EINVAL;
994			else
995				d->bd_sig = sig;
996			break;
997		}
998	case BIOCGRSIG:
999		*(u_int *)addr = d->bd_sig;
1000		break;
1001	}
1002	return (error);
1003}
1004
1005/*
1006 * Set d's packet filter program to fp.  If this file already has a filter,
1007 * free it and replace it.  Returns EINVAL for bogus requests.
1008 */
1009static int
1010bpf_setf(d, fp, cmd)
1011	struct bpf_d *d;
1012	struct bpf_program *fp;
1013	u_long cmd;
1014{
1015	struct bpf_insn *fcode, *old;
1016	u_int wfilter, flen, size;
1017
1018	if (cmd == BIOCSETWF) {
1019		old = d->bd_wfilter;
1020		wfilter = 1;
1021	} else {
1022		wfilter = 0;
1023		old = d->bd_rfilter;
1024	}
1025	if (fp->bf_insns == NULL) {
1026		if (fp->bf_len != 0)
1027			return (EINVAL);
1028		BPFD_LOCK(d);
1029		if (wfilter)
1030			d->bd_wfilter = NULL;
1031		else
1032			d->bd_rfilter = NULL;
1033		reset_d(d);
1034		BPFD_UNLOCK(d);
1035		if (old != NULL)
1036			free((caddr_t)old, M_BPF);
1037		return (0);
1038	}
1039	flen = fp->bf_len;
1040	if (flen > bpf_maxinsns)
1041		return (EINVAL);
1042
1043	size = flen * sizeof(*fp->bf_insns);
1044	fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
1045	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
1046	    bpf_validate(fcode, (int)flen)) {
1047		BPFD_LOCK(d);
1048		if (wfilter)
1049			d->bd_wfilter = fcode;
1050		else
1051			d->bd_rfilter = fcode;
1052		reset_d(d);
1053		BPFD_UNLOCK(d);
1054		if (old != NULL)
1055			free((caddr_t)old, M_BPF);
1056
1057		return (0);
1058	}
1059	free((caddr_t)fcode, M_BPF);
1060	return (EINVAL);
1061}
1062
1063/*
1064 * Detach a file from its current interface (if attached at all) and attach
1065 * to the interface indicated by the name stored in ifr.
1066 * Return an errno or 0.
1067 */
1068static int
1069bpf_setif(d, ifr)
1070	struct bpf_d *d;
1071	struct ifreq *ifr;
1072{
1073	struct bpf_if *bp;
1074	int error;
1075	struct ifnet *theywant;
1076
1077	theywant = ifunit(ifr->ifr_name);
1078	if (theywant == NULL)
1079		return ENXIO;
1080
1081	/*
1082	 * Look through attached interfaces for the named one.
1083	 */
1084	mtx_lock(&bpf_mtx);
1085	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1086		struct ifnet *ifp = bp->bif_ifp;
1087
1088		if (ifp == NULL || ifp != theywant)
1089			continue;
1090		/* skip additional entry */
1091		if (bp->bif_driverp != &ifp->if_bpf)
1092			continue;
1093
1094		mtx_unlock(&bpf_mtx);
1095		/*
1096		 * We found the requested interface.
1097		 * Allocate the packet buffers if we need to.
1098		 * If we're already attached to requested interface,
1099		 * just flush the buffer.
1100		 */
1101		if (d->bd_sbuf == NULL) {
1102			error = bpf_allocbufs(d);
1103			if (error != 0)
1104				return (error);
1105		}
1106		if (bp != d->bd_bif) {
1107			if (d->bd_bif)
1108				/*
1109				 * Detach if attached to something else.
1110				 */
1111				bpf_detachd(d);
1112
1113			bpf_attachd(d, bp);
1114		}
1115		BPFD_LOCK(d);
1116		reset_d(d);
1117		BPFD_UNLOCK(d);
1118		return (0);
1119	}
1120	mtx_unlock(&bpf_mtx);
1121	/* Not found. */
1122	return (ENXIO);
1123}
1124
1125/*
1126 * Support for select() and poll() system calls
1127 *
1128 * Return true iff the specific operation will not block indefinitely.
1129 * Otherwise, return false but make a note that a selwakeup() must be done.
1130 */
1131static int
1132bpfpoll(dev, events, td)
1133	struct cdev *dev;
1134	int events;
1135	struct thread *td;
1136{
1137	struct bpf_d *d;
1138	int revents;
1139
1140	d = dev->si_drv1;
1141	if (d->bd_bif == NULL)
1142		return (ENXIO);
1143
1144	revents = events & (POLLOUT | POLLWRNORM);
1145	BPFD_LOCK(d);
1146	if (events & (POLLIN | POLLRDNORM)) {
1147		if (bpf_ready(d))
1148			revents |= events & (POLLIN | POLLRDNORM);
1149		else {
1150			selrecord(td, &d->bd_sel);
1151			/* Start the read timeout if necessary. */
1152			if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1153				callout_reset(&d->bd_callout, d->bd_rtout,
1154				    bpf_timed_out, d);
1155				d->bd_state = BPF_WAITING;
1156			}
1157		}
1158	}
1159	BPFD_UNLOCK(d);
1160	return (revents);
1161}
1162
1163/*
1164 * Support for kevent() system call.  Register EVFILT_READ filters and
1165 * reject all others.
1166 */
1167int
1168bpfkqfilter(dev, kn)
1169	struct cdev *dev;
1170	struct knote *kn;
1171{
1172	struct bpf_d *d = (struct bpf_d *)dev->si_drv1;
1173
1174	if (kn->kn_filter != EVFILT_READ)
1175		return (1);
1176
1177	kn->kn_fop = &bpfread_filtops;
1178	kn->kn_hook = d;
1179	knlist_add(&d->bd_sel.si_note, kn, 0);
1180
1181	return (0);
1182}
1183
1184static void
1185filt_bpfdetach(kn)
1186	struct knote *kn;
1187{
1188	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1189
1190	knlist_remove(&d->bd_sel.si_note, kn, 0);
1191}
1192
1193static int
1194filt_bpfread(kn, hint)
1195	struct knote *kn;
1196	long hint;
1197{
1198	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1199	int ready;
1200
1201	BPFD_LOCK_ASSERT(d);
1202	ready = bpf_ready(d);
1203	if (ready) {
1204		kn->kn_data = d->bd_slen;
1205		if (d->bd_hbuf)
1206			kn->kn_data += d->bd_hlen;
1207	}
1208	else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1209		callout_reset(&d->bd_callout, d->bd_rtout,
1210		    bpf_timed_out, d);
1211		d->bd_state = BPF_WAITING;
1212	}
1213
1214	return (ready);
1215}
1216
1217/*
1218 * Incoming linkage from device drivers.  Process the packet pkt, of length
1219 * pktlen, which is stored in a contiguous buffer.  The packet is parsed
1220 * by each process' filter, and if accepted, stashed into the corresponding
1221 * buffer.
1222 */
1223void
1224bpf_tap(bp, pkt, pktlen)
1225	struct bpf_if *bp;
1226	u_char *pkt;
1227	u_int pktlen;
1228{
1229	struct bpf_d *d;
1230	u_int slen;
1231
1232	/*
1233	 * Lockless read to avoid cost of locking the interface if there are
1234	 * no descriptors attached.
1235	 */
1236	if (LIST_EMPTY(&bp->bif_dlist))
1237		return;
1238
1239	BPFIF_LOCK(bp);
1240	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1241		BPFD_LOCK(d);
1242		++d->bd_rcount;
1243		slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
1244		if (slen != 0) {
1245			d->bd_fcount++;
1246#ifdef MAC
1247			if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
1248#endif
1249				catchpacket(d, pkt, pktlen, slen, bcopy);
1250		}
1251		BPFD_UNLOCK(d);
1252	}
1253	BPFIF_UNLOCK(bp);
1254}
1255
1256/*
1257 * Copy data from an mbuf chain into a buffer.  This code is derived
1258 * from m_copydata in sys/uipc_mbuf.c.
1259 */
1260static void
1261bpf_mcopy(src_arg, dst_arg, len)
1262	const void *src_arg;
1263	void *dst_arg;
1264	size_t len;
1265{
1266	const struct mbuf *m;
1267	u_int count;
1268	u_char *dst;
1269
1270	m = src_arg;
1271	dst = dst_arg;
1272	while (len > 0) {
1273		if (m == NULL)
1274			panic("bpf_mcopy");
1275		count = min(m->m_len, len);
1276		bcopy(mtod(m, void *), dst, count);
1277		m = m->m_next;
1278		dst += count;
1279		len -= count;
1280	}
1281}
1282
1283/*
1284 * Incoming linkage from device drivers, when packet is in an mbuf chain.
1285 */
1286void
1287bpf_mtap(bp, m)
1288	struct bpf_if *bp;
1289	struct mbuf *m;
1290{
1291	struct bpf_d *d;
1292	u_int pktlen, slen;
1293
1294	/*
1295	 * Lockless read to avoid cost of locking the interface if there are
1296	 * no descriptors attached.
1297	 */
1298	if (LIST_EMPTY(&bp->bif_dlist))
1299		return;
1300
1301	pktlen = m_length(m, NULL);
1302	if (pktlen == m->m_len) {
1303		bpf_tap(bp, mtod(m, u_char *), pktlen);
1304		return;
1305	}
1306
1307	BPFIF_LOCK(bp);
1308	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1309		if (!d->bd_seesent && (m->m_pkthdr.rcvif == NULL))
1310			continue;
1311		BPFD_LOCK(d);
1312		++d->bd_rcount;
1313		slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
1314		if (slen != 0) {
1315			d->bd_fcount++;
1316#ifdef MAC
1317			if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
1318#endif
1319				catchpacket(d, (u_char *)m, pktlen, slen,
1320				    bpf_mcopy);
1321		}
1322		BPFD_UNLOCK(d);
1323	}
1324	BPFIF_UNLOCK(bp);
1325}
1326
1327/*
1328 * Incoming linkage from device drivers, when packet is in
1329 * an mbuf chain and to be prepended by a contiguous header.
1330 */
1331void
1332bpf_mtap2(bp, data, dlen, m)
1333	struct bpf_if *bp;
1334	void *data;
1335	u_int dlen;
1336	struct mbuf *m;
1337{
1338	struct mbuf mb;
1339	struct bpf_d *d;
1340	u_int pktlen, slen;
1341
1342	/*
1343	 * Lockless read to avoid cost of locking the interface if there are
1344	 * no descriptors attached.
1345	 */
1346	if (LIST_EMPTY(&bp->bif_dlist))
1347		return;
1348
1349	pktlen = m_length(m, NULL);
1350	/*
1351	 * Craft on-stack mbuf suitable for passing to bpf_filter.
1352	 * Note that we cut corners here; we only setup what's
1353	 * absolutely needed--this mbuf should never go anywhere else.
1354	 */
1355	mb.m_next = m;
1356	mb.m_data = data;
1357	mb.m_len = dlen;
1358	pktlen += dlen;
1359
1360	BPFIF_LOCK(bp);
1361	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1362		if (!d->bd_seesent && (m->m_pkthdr.rcvif == NULL))
1363			continue;
1364		BPFD_LOCK(d);
1365		++d->bd_rcount;
1366		slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
1367		if (slen != 0) {
1368			d->bd_fcount++;
1369#ifdef MAC
1370			if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
1371#endif
1372				catchpacket(d, (u_char *)&mb, pktlen, slen,
1373				    bpf_mcopy);
1374		}
1375		BPFD_UNLOCK(d);
1376	}
1377	BPFIF_UNLOCK(bp);
1378}
1379
1380/*
1381 * Move the packet data from interface memory (pkt) into the
1382 * store buffer.  "cpfn" is the routine called to do the actual data
1383 * transfer.  bcopy is passed in to copy contiguous chunks, while
1384 * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
1385 * pkt is really an mbuf.
1386 */
1387static void
1388catchpacket(d, pkt, pktlen, snaplen, cpfn)
1389	struct bpf_d *d;
1390	u_char *pkt;
1391	u_int pktlen, snaplen;
1392	void (*cpfn)(const void *, void *, size_t);
1393{
1394	struct bpf_hdr *hp;
1395	int totlen, curlen;
1396	int hdrlen = d->bd_bif->bif_hdrlen;
1397	int do_wakeup = 0;
1398
1399	BPFD_LOCK_ASSERT(d);
1400	/*
1401	 * Figure out how many bytes to move.  If the packet is
1402	 * greater or equal to the snapshot length, transfer that
1403	 * much.  Otherwise, transfer the whole packet (unless
1404	 * we hit the buffer size limit).
1405	 */
1406	totlen = hdrlen + min(snaplen, pktlen);
1407	if (totlen > d->bd_bufsize)
1408		totlen = d->bd_bufsize;
1409
1410	/*
1411	 * Round up the end of the previous packet to the next longword.
1412	 */
1413	curlen = BPF_WORDALIGN(d->bd_slen);
1414	if (curlen + totlen > d->bd_bufsize) {
1415		/*
1416		 * This packet will overflow the storage buffer.
1417		 * Rotate the buffers if we can, then wakeup any
1418		 * pending reads.
1419		 */
1420		if (d->bd_fbuf == NULL) {
1421			/*
1422			 * We haven't completed the previous read yet,
1423			 * so drop the packet.
1424			 */
1425			++d->bd_dcount;
1426			return;
1427		}
1428		ROTATE_BUFFERS(d);
1429		do_wakeup = 1;
1430		curlen = 0;
1431	}
1432	else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
1433		/*
1434		 * Immediate mode is set, or the read timeout has
1435		 * already expired during a select call.  A packet
1436		 * arrived, so the reader should be woken up.
1437		 */
1438		do_wakeup = 1;
1439
1440	/*
1441	 * Append the bpf header.
1442	 */
1443	hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
1444	microtime(&hp->bh_tstamp);
1445	hp->bh_datalen = pktlen;
1446	hp->bh_hdrlen = hdrlen;
1447	/*
1448	 * Copy the packet data into the store buffer and update its length.
1449	 */
1450	(*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
1451	d->bd_slen = curlen + totlen;
1452
1453	if (do_wakeup)
1454		bpf_wakeup(d);
1455}
1456
1457/*
1458 * Initialize all nonzero fields of a descriptor.
1459 */
1460static int
1461bpf_allocbufs(d)
1462	struct bpf_d *d;
1463{
1464	d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1465	if (d->bd_fbuf == NULL)
1466		return (ENOBUFS);
1467
1468	d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1469	if (d->bd_sbuf == NULL) {
1470		free(d->bd_fbuf, M_BPF);
1471		return (ENOBUFS);
1472	}
1473	d->bd_slen = 0;
1474	d->bd_hlen = 0;
1475	return (0);
1476}
1477
1478/*
1479 * Free buffers currently in use by a descriptor.
1480 * Called on close.
1481 */
1482static void
1483bpf_freed(d)
1484	struct bpf_d *d;
1485{
1486	/*
1487	 * We don't need to lock out interrupts since this descriptor has
1488	 * been detached from its interface and it yet hasn't been marked
1489	 * free.
1490	 */
1491	if (d->bd_sbuf != NULL) {
1492		free(d->bd_sbuf, M_BPF);
1493		if (d->bd_hbuf != NULL)
1494			free(d->bd_hbuf, M_BPF);
1495		if (d->bd_fbuf != NULL)
1496			free(d->bd_fbuf, M_BPF);
1497	}
1498	if (d->bd_rfilter)
1499		free((caddr_t)d->bd_rfilter, M_BPF);
1500	if (d->bd_wfilter)
1501		free((caddr_t)d->bd_wfilter, M_BPF);
1502	mtx_destroy(&d->bd_mtx);
1503}
1504
1505/*
1506 * Attach an interface to bpf.  dlt is the link layer type; hdrlen is the
1507 * fixed size of the link header (variable length headers not yet supported).
1508 */
1509void
1510bpfattach(ifp, dlt, hdrlen)
1511	struct ifnet *ifp;
1512	u_int dlt, hdrlen;
1513{
1514
1515	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
1516}
1517
1518/*
1519 * Attach an interface to bpf.  ifp is a pointer to the structure
1520 * defining the interface to be attached, dlt is the link layer type,
1521 * and hdrlen is the fixed size of the link header (variable length
1522 * headers are not yet supporrted).
1523 */
1524void
1525bpfattach2(ifp, dlt, hdrlen, driverp)
1526	struct ifnet *ifp;
1527	u_int dlt, hdrlen;
1528	struct bpf_if **driverp;
1529{
1530	struct bpf_if *bp;
1531	bp = (struct bpf_if *)malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
1532	if (bp == NULL)
1533		panic("bpfattach");
1534
1535	LIST_INIT(&bp->bif_dlist);
1536	bp->bif_driverp = driverp;
1537	bp->bif_ifp = ifp;
1538	bp->bif_dlt = dlt;
1539	mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF);
1540
1541	mtx_lock(&bpf_mtx);
1542	LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
1543	mtx_unlock(&bpf_mtx);
1544
1545	*bp->bif_driverp = NULL;
1546
1547	/*
1548	 * Compute the length of the bpf header.  This is not necessarily
1549	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1550	 * that the network layer header begins on a longword boundary (for
1551	 * performance reasons and to alleviate alignment restrictions).
1552	 */
1553	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1554
1555	if (bootverbose)
1556		if_printf(ifp, "bpf attached\n");
1557}
1558
1559/*
1560 * Detach bpf from an interface.  This involves detaching each descriptor
1561 * associated with the interface, and leaving bd_bif NULL.  Notify each
1562 * descriptor as it's detached so that any sleepers wake up and get
1563 * ENXIO.
1564 */
1565void
1566bpfdetach(ifp)
1567	struct ifnet *ifp;
1568{
1569	struct bpf_if	*bp;
1570	struct bpf_d	*d;
1571
1572	/* Locate BPF interface information */
1573	mtx_lock(&bpf_mtx);
1574	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1575		if (ifp == bp->bif_ifp)
1576			break;
1577	}
1578
1579	/* Interface wasn't attached */
1580	if ((bp == NULL) || (bp->bif_ifp == NULL)) {
1581		mtx_unlock(&bpf_mtx);
1582		printf("bpfdetach: %s was not attached\n", ifp->if_xname);
1583		return;
1584	}
1585
1586	LIST_REMOVE(bp, bif_next);
1587	mtx_unlock(&bpf_mtx);
1588
1589	while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
1590		bpf_detachd(d);
1591		BPFD_LOCK(d);
1592		bpf_wakeup(d);
1593		BPFD_UNLOCK(d);
1594	}
1595
1596	mtx_destroy(&bp->bif_mtx);
1597	free(bp, M_BPF);
1598}
1599
1600/*
1601 * Get a list of available data link type of the interface.
1602 */
1603static int
1604bpf_getdltlist(d, bfl)
1605	struct bpf_d *d;
1606	struct bpf_dltlist *bfl;
1607{
1608	int n, error;
1609	struct ifnet *ifp;
1610	struct bpf_if *bp;
1611
1612	ifp = d->bd_bif->bif_ifp;
1613	n = 0;
1614	error = 0;
1615	mtx_lock(&bpf_mtx);
1616	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1617		if (bp->bif_ifp != ifp)
1618			continue;
1619		if (bfl->bfl_list != NULL) {
1620			if (n >= bfl->bfl_len) {
1621				mtx_unlock(&bpf_mtx);
1622				return (ENOMEM);
1623			}
1624			error = copyout(&bp->bif_dlt,
1625			    bfl->bfl_list + n, sizeof(u_int));
1626		}
1627		n++;
1628	}
1629	mtx_unlock(&bpf_mtx);
1630	bfl->bfl_len = n;
1631	return (error);
1632}
1633
1634/*
1635 * Set the data link type of a BPF instance.
1636 */
1637static int
1638bpf_setdlt(d, dlt)
1639	struct bpf_d *d;
1640	u_int dlt;
1641{
1642	int error, opromisc;
1643	struct ifnet *ifp;
1644	struct bpf_if *bp;
1645
1646	if (d->bd_bif->bif_dlt == dlt)
1647		return (0);
1648	ifp = d->bd_bif->bif_ifp;
1649	mtx_lock(&bpf_mtx);
1650	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1651		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
1652			break;
1653	}
1654	mtx_unlock(&bpf_mtx);
1655	if (bp != NULL) {
1656		opromisc = d->bd_promisc;
1657		bpf_detachd(d);
1658		bpf_attachd(d, bp);
1659		BPFD_LOCK(d);
1660		reset_d(d);
1661		BPFD_UNLOCK(d);
1662		if (opromisc) {
1663			error = ifpromisc(bp->bif_ifp, 1);
1664			if (error)
1665				if_printf(bp->bif_ifp,
1666					"bpf_setdlt: ifpromisc failed (%d)\n",
1667					error);
1668			else
1669				d->bd_promisc = 1;
1670		}
1671	}
1672	return (bp == NULL ? EINVAL : 0);
1673}
1674
1675static void
1676bpf_clone(arg, cred, name, namelen, dev)
1677	void *arg;
1678	struct ucred *cred;
1679	char *name;
1680	int namelen;
1681	struct cdev **dev;
1682{
1683	int u;
1684
1685	if (*dev != NULL)
1686		return;
1687	if (dev_stdclone(name, NULL, "bpf", &u) != 1)
1688		return;
1689	*dev = make_dev(&bpf_cdevsw, unit2minor(u), UID_ROOT, GID_WHEEL, 0600,
1690	    "bpf%d", u);
1691	dev_ref(*dev);
1692	(*dev)->si_flags |= SI_CHEAPCLONE;
1693	return;
1694}
1695
1696static void
1697bpf_drvinit(unused)
1698	void *unused;
1699{
1700
1701	mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
1702	LIST_INIT(&bpf_iflist);
1703	EVENTHANDLER_REGISTER(dev_clone, bpf_clone, 0, 1000);
1704}
1705
1706static void
1707bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
1708{
1709
1710	bzero(d, sizeof(*d));
1711	BPFD_LOCK_ASSERT(bd);
1712	d->bd_immediate = bd->bd_immediate;
1713	d->bd_promisc = bd->bd_promisc;
1714	d->bd_hdrcmplt = bd->bd_hdrcmplt;
1715	d->bd_seesent = bd->bd_seesent;
1716	d->bd_async = bd->bd_async;
1717	d->bd_rcount = bd->bd_rcount;
1718	d->bd_dcount = bd->bd_dcount;
1719	d->bd_fcount = bd->bd_fcount;
1720	d->bd_sig = bd->bd_sig;
1721	d->bd_slen = bd->bd_slen;
1722	d->bd_hlen = bd->bd_hlen;
1723	d->bd_bufsize = bd->bd_bufsize;
1724	d->bd_pid = bd->bd_pid;
1725	strlcpy(d->bd_ifname,
1726	    bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
1727	strlcpy(d->bd_pcomm, bd->bd_pcomm, MAXCOMLEN);
1728	d->bd_locked = bd->bd_locked;
1729}
1730
1731static int
1732bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
1733{
1734	struct xbpf_d *xbdbuf, *xbd;
1735	int index, error;
1736	struct bpf_if *bp;
1737	struct bpf_d *bd;
1738
1739	/*
1740	 * XXX This is not technically correct. It is possible for non
1741	 * privileged users to open bpf devices. It would make sense
1742	 * if the users who opened the devices were able to retrieve
1743	 * the statistics for them, too.
1744	 */
1745	error = suser(req->td);
1746	if (error)
1747		return (error);
1748	if (req->oldptr == NULL)
1749		return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd)));
1750	if (bpf_bpfd_cnt == 0)
1751		return (SYSCTL_OUT(req, 0, 0));
1752	xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK);
1753	mtx_lock(&bpf_mtx);
1754	if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) {
1755		mtx_unlock(&bpf_mtx);
1756		free(xbdbuf, M_BPF);
1757		return (ENOMEM);
1758	}
1759	index = 0;
1760	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1761		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
1762			xbd = &xbdbuf[index++];
1763			BPFD_LOCK(bd);
1764			bpfstats_fill_xbpf(xbd, bd);
1765			BPFD_UNLOCK(bd);
1766		}
1767	}
1768	mtx_unlock(&bpf_mtx);
1769	error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
1770	free(xbdbuf, M_BPF);
1771	return (error);
1772}
1773
1774SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL)
1775
1776#else /* !DEV_BPF && !NETGRAPH_BPF */
1777/*
1778 * NOP stubs to allow bpf-using drivers to load and function.
1779 *
1780 * A 'better' implementation would allow the core bpf functionality
1781 * to be loaded at runtime.
1782 */
1783
1784void
1785bpf_tap(bp, pkt, pktlen)
1786	struct bpf_if *bp;
1787	u_char *pkt;
1788	u_int pktlen;
1789{
1790}
1791
1792void
1793bpf_mtap(bp, m)
1794	struct bpf_if *bp;
1795	struct mbuf *m;
1796{
1797}
1798
1799void
1800bpf_mtap2(bp, d, l, m)
1801	struct bpf_if *bp;
1802	void *d;
1803	u_int l;
1804	struct mbuf *m;
1805{
1806}
1807
1808void
1809bpfattach(ifp, dlt, hdrlen)
1810	struct ifnet *ifp;
1811	u_int dlt, hdrlen;
1812{
1813}
1814
1815void
1816bpfattach2(ifp, dlt, hdrlen, driverp)
1817	struct ifnet *ifp;
1818	u_int dlt, hdrlen;
1819	struct bpf_if **driverp;
1820{
1821}
1822
1823void
1824bpfdetach(ifp)
1825	struct ifnet *ifp;
1826{
1827}
1828
1829u_int
1830bpf_filter(pc, p, wirelen, buflen)
1831	const struct bpf_insn *pc;
1832	u_char *p;
1833	u_int wirelen;
1834	u_int buflen;
1835{
1836	return -1;	/* "no filter" behaviour */
1837}
1838
1839int
1840bpf_validate(f, len)
1841	const struct bpf_insn *f;
1842	int len;
1843{
1844	return 0;		/* false */
1845}
1846
1847#endif /* !DEV_BPF && !NETGRAPH_BPF */
1848